In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from IPython.display import Markdown

In [None]:
dogs = pd.read_csv("psy_data.csv", header=0).set_index("dog.id")
display(dogs.head())
dog_info = pd.read_csv("psy_info.csv", header=1)
print(dogs.columns[10:100])


In [None]:
dogs["rounded_weight"] = np.floor(dogs["weight_kg"])
display(dogs.sort_values("weight_kg").loc[:,["weight_kg", "rounded_weight", "Aggressive_dogs_samegender"]].head(15))
q1 = dogs["rounded_weight"].quantile(0.25)
q3 = dogs["rounded_weight"].quantile(0.75)
iqr = q3 - q1
lower = q1 - 1.5*iqr
upper = q3 + 1.5*iqr

figure, axes = plt.subplots(2, 1, figsize = (15, 15), sharey = False)
sns.histplot(data = dogs, x = "weight_kg", ax = axes[0])

aggression_per_weight_samegender = pd.DataFrame(dogs.query("rounded_weight > @lower and rounded_weight < @upper").groupby("rounded_weight").mean("Aggressive_dogs_samegender")["Aggressive_dogs_samegender"])
sns.lineplot(data = aggression_per_weight_samegender, x = "rounded_weight", y = "Aggressive_dogs_samegender", label = "Aggressive_samegender", ax = axes[1])

aggression_per_weight_people = pd.DataFrame(dogs.query("rounded_weight > @lower and rounded_weight < @upper").groupby("rounded_weight").mean("Aggressive_people")["Aggressive_people"])
sns.lineplot(data = aggression_per_weight_people , x = "rounded_weight", y = "Aggressive_people", label = "Aggressive_people", ax = axes[1])

aggression_per_weight_oppositegender = pd.DataFrame(dogs.query("rounded_weight > @lower and rounded_weight < @upper").groupby("rounded_weight").mean("Aggressive_dogs_oppositegender")["Aggressive_dogs_oppositegender"])
sns.lineplot(data = aggression_per_weight_oppositegender , x = "rounded_weight", y = "Aggressive_dogs_oppositegender", label = "Aggressive_dogs_oppositegender", ax = axes[1])

In [None]:
grid = []
grid.append(sns.FacetGrid(data=dogs, col='obtained_from', sharey=False))
grid[0].map(sns.countplot, "Sensitive_touch")

grid.append(sns.FacetGrid(data=dogs, col='obtained_from', sharey=False))
grid[1].map(sns.countplot, "Anxious")

grid.append(sns.FacetGrid(data=dogs, col='obtained_from', sharey=False))
grid[2].map(sns.countplot, "Human_oriented")

for gr in grid:
    plt.show(gr)

dogs_melted = dogs.melt(id_vars=['obtained_from'], 
                        value_vars=['Sensitive_touch', 'Anxious', 'Human_oriented'],
                        var_name='Behavior', 
                        value_name='Score')

priemery = dogs_melted.groupby(["Behavior", "obtained_from"])["Score"].mean().reset_index()

priemerydict = {}
booldict = {}
for x in ["dog_aggression_score", "owner_aggression_score", "stranger_aggression_score", "barking_score"]:
    dogs[x + "_interval"] = pd.cut(x = dogs[x], bins = np.linspace(dogs[x].min(), dogs[x].max(), 50))
    priemerydict[x] = dogs.groupby(x + "_interval", observed = False)["noise_sensitivity_score"].mean().reset_index()
    booldict[x] = dogs.reset_index().groupby(x + "_interval", observed= False)["dog.id"].count() < 10

figure, axes = plt.subplots(2, 2, figsize = (12, 10))
for i, n in enumerate(["dog_aggression_score", "owner_aggression_score", "stranger_aggression_score", "barking_score"]):
    sns.barplot(data=priemerydict[n], x = n + "_interval", y = "noise_sensitivity_score", ax = axes[i//2][i%2], color = "maroon")
    sns.stripplot(data = dogs.iloc[list(dogs[n + "_interval"].replace(booldict[n]).fillna(False)), :], x = n + "_interval", y = "noise_sensitivity_score", ax = axes[i//2][i%2], color = "goldenrod")
    labels = np.array(np.round(np.linspace(dogs[x].min(), dogs[x].max(), 50), decimals = 2), dtype = str)
    labels[1::2] = ""
    nazov = n.split("_")
    if len(nazov) == 3:
        axes[i//2][i%2].set_title(f"Noise sensitivity by {nazov[0]} {nazov[1]} {nazov[2]}")
    else:
        axes[i//2][i%2].set_title(f"Noise sensitivity by {nazov[0]} {nazov[1]}")
    axes[i//2][i%2].set_xticklabels(labels, rotation = 45)
    axes[i//2][i%2].set_xlabel(n)

figure.subplots_adjust(hspace=0.5)
px.bar(data_frame= priemery, x = "Score", y = "obtained_from", facet_col= "Behavior", facet_col_wrap=3).show()