In [43]:
import pandas as pd
import numpy as np
import os
import plotly.express as px
import kaleido
import plotly.io as pio

## Change file path as needed!!!!
wd = 'F:\\github\\doon_pet_survey_graphs\\'

## Load csv files for cat and dog
cat = pd.read_csv(f'{wd}cat_combine.csv')
dog = pd.read_csv(f'{wd}dog_combine.csv')

col_scale = ['#169999','#555486', '#999999']

cat_order_dict = {
    'cat_age':['Kitten (0-6 months)','Junior (7 months - 2 years)', 'Adult (3-6 years)','Mature (>7 years)'],
    'cat_sex' : ['Female', 'Male'] ,
    'cat_neutered' : ['Neutered', 'Not neutered'],
    'cat_describe' : ["Indoor-outdoor cat (the cat wanders outside on its own, but you feed it and look after it when it is sick)",'Completely indoor cat (always stays at home and does not go out on its own'],
    'cat_feed_freq' : ['Once a day', 'Twice a day', 'Thrice a day', 'Continuous supply of food is available' ],
    'cat_time_out': ['Completely indoors', '1-3 hours', '3-5 hours', '5-7 hours', '>7 hours'],
    'cat_stay' : ['At home', 'Outside', 'Either at home or outside'],
    'cat_hunt_yn': ['Yes', 'No'],
    'cat_hunt_yn_fix': ['Yes', 'No'],
    'cat_hunt_freq': ['Once a week', 'Once in 15 days', 'Once a month','Once every few months'], 
    'cat_time': ['<20 minutes', '20 minutes-1hour','1-2 hours','2-4 hours','>4 hours'],
    'cat_hunt_fix' : ['Rodents', 'Insects', 'Birds', 'Amphibians', 'Reptiles']
    }

In [44]:
def nice_graph(df, x_var, col_var, 
               graph_title, x_tit, legend_tit, 
               h, w, 
               an_x,an_y,
               dof, alpha, chi2, crit, p):
    fig = px.histogram(df, x_var, color= col_var,
                   # Graph title
                   title = graph_title, 
                   # Set color sequence
                   color_discrete_sequence= col_scale,
                   # Define required order of categories
                   category_orders= { x_var : cat_order_dict[x_var]}, 
                   # Include labels
                   text_auto=True
                   )
    fig.update_yaxes(title = 'Number of cats', # Y-axis title
                 # Set the grid format
                 showline=True, linewidth=1, linecolor='Black',
                 gridcolor='Grey', gridwidth=0.75)
    
    fig.update_xaxes(title = x_tit, # X-axis title
                 showline=True, linewidth=1, linecolor='Black')

    fig.update_layout(showlegend=True, # Display legend
                  # Legend title
                  legend_title_text= legend_tit, 
                  # Background color
                  plot_bgcolor='White', 
                  # Define range of y axis
                  yaxis_range=[0,max(df[x_var].value_counts()*1.1)],
                  # Change size of graph
                  autosize=False,width=w,height=h
                  )

    fig.add_annotation(text=
                   f'Chi2 value= {chi2} <br> alpha= {alpha}, dof={dof} <br> Critical value = {crit} <br> P value = {p}',
                   align = 'left',
                  xref="paper", yref="paper",
                  x=an_x, y=an_y, 
                  bordercolor='Black',borderwidth=1,
                  showarrow=False)
    
     # Save figure as png
    pio.write_image(fig, f'{wd}stack_{x_var}and{col_var}.png', engine="kaleido")
    
    # Display success message
    print(f'Prepared stacked bar graph for {x_var} and {col_var}')
    
    fig.show()

In [45]:
# 1. Plot of hunting activity of cats across age groups

#Chi square association
#Result for cat_hunt_yn_fix and cat_age: 
#reject h0, there is a relationship 
# Chi square value = 11.809170244367774 
# Critical value = 7.814727903251179 
# dof=3
# P value = 0.008066287380498083


nice_graph(df= cat, x_var= 'cat_age',
col_var= 'cat_hunt_yn_fix',
graph_title= 'Plot of hunting activity of cats across age groups',
x_tit= 'Cat age groups',
legend_tit= 'Cats that hunt',
h=800,w=800,
an_x=1,an_y=0.76, 
dof=3, alpha=0.05, chi2=11.80, crit=7.8, p=0.08)





Prepared stacked bar graph for cat_age and cat_hunt_yn_fix


In [46]:
# 2. Plot of cat hunting and time spent outside

#Result for cat_hunt_yn_fix and cat_time_out: 
#reject h0, there is a relationship 
# Chi square value = 27.40818514439656 
# Critical value = 9.487729036781154
# P value = 1.643714720800382e-05
# dof = 4

nice_graph(df = cat, x_var= 'cat_time_out',
col_var= 'cat_hunt_yn_fix',
graph_title= 'Plot of cat hunting and time spent outside',
x_tit= 'Time spent outside',
legend_tit= 'Cats that hunt',
h=400,w=800,
an_x=1.22,an_y=0.33, 
dof=4, alpha=0.05, chi2=27.4, crit=9.48, p='<0.001')





Prepared stacked bar graph for cat_time_out and cat_hunt_yn_fix


In [47]:
#Result for cat_hunt_yn_fix and cat_feed_freq: 
#reject h0, there is a relationship 
# Chi square value = 11.707137937819756 
# Critical value = 7.814727903251179 
# P value = 0.008456773291222408
# dof =3

nice_graph(df = cat, x_var= 'cat_feed_freq',
col_var= 'cat_hunt_yn_fix',
graph_title= 'Plot of cat hunting and feeding frequency',
x_tit= 'Feeding frequency',
legend_tit= 'Cats that hunt',
h=800,w=800,
an_x=1.2,an_y=0.81, 
dof=3, alpha=0.05, chi2=11.70, crit=7.81, p=0.008)





Prepared stacked bar graph for cat_feed_freq and cat_hunt_yn_fix


In [48]:
#Result for cat_hunt_freq and cat_neutered: 
#reject h0, there is a relationship 
# Chi square value = 8.40737017854709 
# Critical value = 7.814727903251179 
# P value = 0.03830173782538393
# dof =3

nice_graph(df = cat, x_var= 'cat_neutered',
col_var= 'cat_hunt_yn_fix',
graph_title= 'Plot of cat hunting and neutered status',
x_tit= 'Cats neutered status',
legend_tit= 'Cats that hunt',
h=800,w=400,
an_x=1.65,an_y=0.76, 
dof=3, alpha=0.05, chi2=8.40, crit=7.81, p=0.03)





Prepared stacked bar graph for cat_neutered and cat_hunt_yn_fix


In [49]:


#Result for cat_hunt and cat_neutered: 
#reject h0, there is a relationship 
# Chi square value = 12.531169871794873 
# Critical value = 11.070497693516351
# P value = 0.028191580540761763
# dof =5

# Select only required columns from the dataframe
df1 = cat[['cat_hunt', 'cat_neutered']]
# Remove multiple entries in one line
df1['cat_hunt_fix'] = df1['cat_hunt'].str.split(';')
df2 = df1.explode('cat_hunt_fix').reset_index(drop=True)
# Remove blank values
df3 = df2[df2['cat_hunt_fix']!='']
# Remove text inside ()
df3['cat_hunt_fix'] = df3['cat_hunt_fix'].str.split('(').str.get(0).str.strip()

nice_graph(df = df3, x_var= 'cat_hunt_fix',
col_var= 'cat_neutered',
graph_title= 'Plot of types of prey by cat neutered status',
x_tit= 'Types of prey',
legend_tit= 'Cats neutered status',
h=400,w=800,
an_x=1.3,an_y=0.50, 
dof=5, alpha=0.05, chi2=12.53, crit=11.07, p=0.02)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy





Prepared stacked bar graph for cat_hunt_fix and cat_neutered


In [50]:


#Result for cat_hunt and cat_stay: 
#reject h0, there is a relationship 
# Chi square value = 21.717692991586432 
# Critical value = 18.307038053275146 
# P value = 0.01660917632535741
# dof =10

# Select only required columns from the dataframe
df1 = cat[['cat_hunt', 'cat_stay']]
# Remove multiple entries in one line
df1['cat_hunt_fix'] = df1['cat_hunt'].str.split(';')
df2 = df1.explode('cat_hunt_fix').reset_index(drop=True)
# Remove blank values
df3 = df2[df2['cat_hunt_fix']!='']
# Remove text inside ()
df3['cat_hunt_fix'] = df3['cat_hunt_fix'].str.split('(').str.get(0).str.strip()

nice_graph(df = df3, x_var= 'cat_hunt_fix',
col_var= 'cat_stay',
graph_title= 'Plot of types of prey and time spent outside',
x_tit= 'Types of prey',
legend_tit= 'Cat description',
h=400,w=800,
an_x=1.3,an_y=0.45, 
dof=4, alpha=0.05, chi2=27.4, crit=9.48, p='<0.001')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy





Prepared stacked bar graph for cat_hunt_fix and cat_stay
