In [5]:
import pandas as pd
df = pd.read_csv("CarSharing_Preprocessed.csv")

In [6]:
from scipy.stats import spearmanr

# Spearman's Correlation for numerical columns
print("\nSpearmans:")
for column in ['temp', 'temp_feel', 'humidity', 'windspeed']:
    spearman_corr, p_value = spearmanr(df[column], df['demand'])
    print(f"Spearman's correlation for {column} vs demand: Correlation coefficient = {spearman_corr}, p-value = {p_value}")

from scipy.stats import mannwhitneyu

# Mann-Whitney U Test for 'holiday' and 'workingday' since they have two categories (Yes/No)
print("\nMann-Whitney:")
for column in ['holiday', 'workingday']:
    categories = df[column].unique()
    group1 = df[df[column] == categories[0]]['demand']
    group2 = df[df[column] == categories[1]]['demand']
    
    stat, p_value = mannwhitneyu(group1, group2)
    print(f"Mann-Whitney U test for {column} vs demand: U-statistic = {stat}, p-value = {p_value}")

from scipy.stats import kruskal

# Kruskal-Wallis H Test for 'season' and 'weather' since they have more than two categories
print("\nkruskal:")
for column in ['season', 'weather']:
    groups = [df['demand'][df[column] == category].values for category in df[column].unique()]
    stat, p_value = kruskal(*groups)
    print(f"Kruskal-Wallis H test for {column} vs demand: H-statistic = {stat}, p-value = {p_value}")
print("\n")


Spearmans:
Spearman's correlation for temp vs demand: Correlation coefficient = 0.43844572215931016, p-value = 0.0
Spearman's correlation for temp_feel vs demand: Correlation coefficient = 0.44102614045558325, p-value = 0.0
Spearman's correlation for humidity vs demand: Correlation coefficient = -0.35812997188117063, p-value = 7.73530011757583e-262
Spearman's correlation for windspeed vs demand: Correlation coefficient = 0.1359876278300755, p-value = 3.225447389750297e-37

Mann-Whitney:
Mann-Whitney U test for holiday vs demand: U-statistic = 1027283.5, p-value = 0.6909487336672528
Mann-Whitney U test for workingday vs demand: U-statistic = 8230365.0, p-value = 0.827954509566204

kruskal:
Kruskal-Wallis H test for season vs demand: H-statistic = 573.2373249072575, p-value = 6.382038538465807e-124
Kruskal-Wallis H test for weather vs demand: H-statistic = 198.87050806598123, p-value = 7.399641641145851e-43


