In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from IPython.display import Markdown

In [None]:
dogs = pd.read_csv("psy_data.csv", header=0).set_index("dog.id")
display(dogs.head())
dog_info = pd.read_csv("psy_info.csv", header=1)
print(dogs.columns[10:100])


In [None]:
dogs["rounded_weight"] = np.floor(dogs["weight_kg"])
display(dogs.sort_values("weight_kg").loc[:,["weight_kg", "rounded_weight", "Aggressive_dogs_samegender"]].head(15))
q1 = dogs["rounded_weight"].quantile(0.25)
q3 = dogs["rounded_weight"].quantile(0.75)
iqr = q3 - q1
lower = q1 - 1.5*iqr
upper = q3 + 1.5*iqr

figure, axes = plt.subplots(2, 1, figsize = (15, 15), sharey = False)
sns.histplot(data = dogs, x = "weight_kg", ax = axes[0])

aggression_per_weight_samegender = pd.DataFrame(dogs.query("rounded_weight > @lower and rounded_weight < @upper").groupby("rounded_weight").mean("Aggressive_dogs_samegender")["Aggressive_dogs_samegender"])
sns.lineplot(data = aggression_per_weight_samegender, x = "rounded_weight", y = "Aggressive_dogs_samegender", label = "Aggressive_samegender", ax = axes[1])

aggression_per_weight_people = pd.DataFrame(dogs.query("rounded_weight > @lower and rounded_weight < @upper").groupby("rounded_weight").mean("Aggressive_people")["Aggressive_people"])
sns.lineplot(data = aggression_per_weight_people , x = "rounded_weight", y = "Aggressive_people", label = "Aggressive_people", ax = axes[1])

aggression_per_weight_oppositegender = pd.DataFrame(dogs.query("rounded_weight > @lower and rounded_weight < @upper").groupby("rounded_weight").mean("Aggressive_dogs_oppositegender")["Aggressive_dogs_oppositegender"])
sns.lineplot(data = aggression_per_weight_oppositegender , x = "rounded_weight", y = "Aggressive_dogs_oppositegender", label = "Aggressive_dogs_oppositegender", ax = axes[1])

In [None]:
grid = []
grid.append(sns.FacetGrid(data=dogs, col='obtained_from', sharey=False))
grid[0].map(sns.histplot, "Sensitive_touch")

grid.append(sns.FacetGrid(data=dogs, col='obtained_from', sharey=False))
grid[1].map(sns.histplot, "Anxious")

grid.append(sns.FacetGrid(data=dogs, col='obtained_from', sharey=False))
grid[2].map(sns.histplot, "Human_oriented")

for gr in grid:
    plt.show(gr)

dogs_melted = dogs.melt(id_vars=['obtained_from'], 
                        value_vars=['Sensitive_touch', 'Anxious', 'Human_oriented'],
                        var_name='Behavior', 
                        value_name='Score')

priemery = dogs_melted.groupby(["Behavior", "obtained_from"])["Score"].mean().reset_index()

dogs["interval"] = pd.cut(x = dogs["dog_aggression_score"], bins = np.linspace(dogs["dog_aggression_score"].min(), dogs["dog_aggression_score"].max(), 50))
priemery2 = dogs.groupby("interval", observed = False)["noise_sensitivity_score"].mean().reset_index()

boolseries = dogs.reset_index().groupby("interval", observed= False)["dog.id"].count() < 10

axe = sns.barplot(data=priemery2, x = "interval", y = "noise_sensitivity_score")
sns.stripplot(data = dogs.iloc[list(dogs["interval"].replace(boolseries).fillna(False)), :], x = "interval", y = "noise_sensitivity_score", ax = axe)
labels = np.array(np.round(np.linspace(dogs["dog_aggression_score"].min(), dogs["dog_aggression_score"].max(), 50), decimals = 2), dtype = str)
labels[1::2] = ""
axe.set_xticklabels(labels, rotation = 45)

px.bar(data_frame= priemery, x = "Score", y = "obtained_from", facet_col= "Behavior", facet_col_wrap=3).show()