In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import plotly.express as px
import numpy as np
import seaborn as sns
from scipy.stats import pearsonr, spearmanr

In [None]:
df = pd.read_parquet("..\escooter_history_2022_new.parquet.gzip")

In [None]:
df_agg = df.groupby(["date","hour", "weekday", "daytype"], as_index=False).agg(
    total_rental=("datetime","count"),
    temp_mode=("temp", pd.Series.mode),
    temp_mean=("temp", "mean"),
    atemp_mode=("atemp", pd.Series.mode),
    atemp_mean=("atemp", "mean"),
    humidity_mode=("humidity", pd.Series.mode),
    humidity_mean=("humidity", "mean"),
    weather_mode=("weather", pd.Series.mode),
    windspeed_mode=("windspeed", pd.Series.mode),
    windspeed_mean=("windspeed", "mean"),
    total_registered=("registered_customer", "sum")
    ).round()
df_agg["total_unregistered"] = df_agg["total_rental"] - df_agg["total_registered"] 
df_agg['day_hour'] = (pd.to_datetime(df_agg['date'], dayfirst=False)
               + pd.to_timedelta(df_agg['hour'].astype(int), unit='H')
              )

In [None]:
def normalized_df(param, aggregation, df=df_agg):
    """
    Funktion, um Dataframes zu erstellen, die gruppiert nach 
    einer Attribut des df_agg bspw. temp_mode (param_aggregation) die durchschnittliche Anzahl
    Ausleihen pro Stunde je Attribut-Ausprägung enthält
    Beispielsweise:
    Bei Temperatur 30 Grad gibt es 200 Ausleihen pro Stunde durchschnittlich 
    
    """
    df_new = df.groupby(f"{param}_{aggregation}", as_index=False).agg(
        count = ("date", "count"),
        total_rental = ("total_rental", "sum"),
        total_registered = ("total_registered", "sum"),
        total_unregistered = ("total_unregistered", "sum")
    )
    df_new[f"mean_rental_per_{param}_hour"] = df_new["total_rental"] / df_new["count"]
    df_new[f"mean_rental_per_{param}_hour_r"] = df_new["total_registered"] / df_new["count"]
    df_new[f"mean_rental_per_{param}_hour_u"] = df_new["total_unregistered"] / df_new["count"]       
    return df_new


def pearson_for_df(df,param, aggregation):
    """
    Errechnet und printed für ein df der Form von normalized_df den
    Pearson-Koeffizient für total, registered und unregistered.
    Übergeben werden muss param (bpsw. temp) und aggregation (bspw. mode)
    des Dataframes    
    """
    total = pearsonr(df[f"{param}_{aggregation}"], df[f"mean_rental_per_{param}_hour"])
    registered = pearsonr(df[f"{param}_{aggregation}"], df[f"mean_rental_per_{param}_hour_r"])
    unregistered = pearsonr(df[f"{param}_{aggregation}"], df[f"mean_rental_per_{param}_hour_u"])
    print(f"total: {total}")
    print(f"registered: {registered}")
    print(f"unregistered: {unregistered}")

In [None]:
# Für alle Attribut wird ein DF erstellt
df_temp_mean = normalized_df("temp", "mean")
df_temp_mode = normalized_df("temp", "mode")
df_atemp_mean = normalized_df("atemp", "mean")
df_atemp_mode = normalized_df("atemp", "mode")
df_humidity_mean = normalized_df("humidity", "mean")
df_humidity_mode = normalized_df("humidity", "mode")
df_windspeed_mean = normalized_df("windspeed", "mean")
df_windspeed_mode = normalized_df("windspeed", "mode")
df_weather_mode = normalized_df("weather", "mode")

In [None]:
px.scatter(df_temp_mode, x= "temp_mode", y="mean_rental_per_temp_hour", color="count", trendline="ols", trendline_color_override="darkgrey")

In [None]:
spearmanr(df_temp_mode["temp_mode"], df_temp_mode["mean_rental_per_temp_hour"])

In [None]:
px.scatter(df_temp_mean, x= "temp_mean", y="mean_rental_per_temp_hour", color="count", trendline="ols", trendline_color_override="darkgrey")

In [None]:
px.scatter(df_atemp_mode, x= "atemp_mode", y="mean_rental_per_atemp_hour", color="count", hover_name="count", trendline="ols", trendline_color_override="darkgrey")

In [None]:
spearmanr(df_atemp_mode["atemp_mode"], df_atemp_mode["mean_rental_per_atemp_hour"])

In [None]:
px.scatter(df_atemp_mean, x= "atemp_mean", y="mean_rental_per_atemp_hour", color="count", hover_name="count", trendline="ols", trendline_color_override="darkgrey")

In [None]:
px.scatter(df_windspeed_mode, x= "windspeed_mode", y="mean_rental_per_windspeed_hour", color="count", trendline="ols", trendline_color_override="darkgrey")

In [None]:
spearmanr(df_windspeed_mode["windspeed_mode"], df_windspeed_mode["mean_rental_per_windspeed_hour"])

In [None]:
px.scatter(df_windspeed_mean, x= "windspeed_mean", y="mean_rental_per_windspeed_hour", color="count", trendline="ols", trendline_color_override="darkgrey")

In [None]:
px.bar(df_weather_mode, x="weather_mode", y="mean_rental_per_weather_hour", color="count")