# Data Import

In [None]:
# DataVis.ipynb
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the processed dataset
wetter_umsatzdaten_kiwo = pd.read_csv("../0_DataPreparation/processed_data.csv")
print(wetter_umsatzdaten_kiwo.columns)

# Load the imputed processed dataset
#wetter_umsatzdaten_kiwo = pd.read_csv("../0_DataPreparation/processed_data_imputed.csv")
#print(wetter_umsatzdaten_kiwo.columns)


# Visualization of the distribution of weather data

In [None]:
# Distribution of Windgeschwindigkeit
unique_windgeschwindigkeit = sorted(wetter_umsatzdaten_kiwo['Windgeschwindigkeit'].unique())
plt.figure(figsize=(10, 6))
sns.histplot(wetter_umsatzdaten_kiwo['Windgeschwindigkeit'], kde=True, bins=unique_windgeschwindigkeit)
plt.title('Distribution of Windgeschwindigkeit')
plt.xlabel('Windgeschwindigkeit (m/s)')
plt.ylabel('Frequency')
plt.show()

#Distribution of Temperatur
unique_temperatur = sorted(wetter_umsatzdaten_kiwo['Temperatur'].unique())
plt.figure(figsize=(10, 6))
sns.histplot(wetter_umsatzdaten_kiwo['Temperatur'], kde=True, bins=45)
plt.title('Distribution of Temperature')
plt.xlabel('Temperature (°C)')  
plt.ylabel('Frequency')
plt.show()

#Distribution of Bewoelkung
unique_bewoelkung = sorted(wetter_umsatzdaten_kiwo['Bewoelkung'].unique())
plt.figure(figsize=(10, 6))
sns.histplot(wetter_umsatzdaten_kiwo['Bewoelkung'], kde=True, bins=unique_bewoelkung)
plt.title('Distribution of Cloud Status')
plt.xlabel('Bewoelkung')
plt.ylabel('Frequency')
plt.show()

#Distribution of Wettercode
unique_wettercode = sorted(wetter_umsatzdaten_kiwo['Wettercode'].unique())
plt.figure(figsize=(10, 6))
sns.histplot(wetter_umsatzdaten_kiwo['Wettercode'], kde=True, bins=unique_wettercode)
plt.title('Distribution of Weather Code')
plt.xlabel('Weather Code')
plt.ylabel('Frequency')
plt.show()

#Distribution of Ferien
unique_ferien = wetter_umsatzdaten_kiwo['Ferien'].unique()
plt.figure(figsize=(10, 6))
sns.histplot(wetter_umsatzdaten_kiwo['Ferien'], kde=False, bins=len(unique_ferien))
plt.title('Distribution of Ferien', fontsize=20)
plt.xlabel('')
plt.ylabel('Frequency', fontsize=20)
plt.xticks(rotation=45, fontsize=20)
plt.yticks(fontsize=20)
plt.show()


# Visualization of sales and feature relationship

In [None]:
# Sales per Ferien Category
plt.figure(figsize=(12, 6))
sns.barplot(x='Ferien', y='Umsatz', data=wetter_umsatzdaten_kiwo, errorbar=('ci', 95), capsize=0.1)
plt.title('Mean Sales per Ferien Category with 95% Confidence Intervals')
plt.ylabel('Mean Sales')
plt.xlabel('Ferien Category')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
# Sort the dataframe by date
wetter_umsatzdaten_kiwo.sort_values(by='Datum', inplace=True)

# Sales per No Ferien Category
wetter_umsatzdaten_kiwo['Ferien'].fillna('Keine Ferien', inplace=True)
plt.figure(figsize=(15, 10))
sns.barplot(x='Ferien', y='Umsatz', data=wetter_umsatzdaten_kiwo, errorbar=('ci', 95), capsize=0.1)
plt.title('Mean Sales per Ferien Category with 95% Confidence Intervals', fontsize=20)
plt.ylabel('Mean Sales', fontsize=20)
plt.xlabel('')
plt.xticks(rotation=45, fontsize=20)
plt.yticks(fontsize=20)
plt.tight_layout()
plt.show()


# Sales per No Ferien Category
plt.figure(figsize=(12, 6))
sns.barplot(x='Is_Ferien', y='Umsatz', data=wetter_umsatzdaten_kiwo, errorbar=('ci', 95), capsize=0.1)
plt.title('Mean Sales per No Ferien Category with 95% Confidence Intervals')
plt.ylabel('Mean Sales')
plt.xlabel('No Ferien Category')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


#Sales per Wind_category
plt.figure(figsize=(10, 6))
sns.barplot(x='Windgeschwindigkeit_Beaufort', y='Umsatz', data=wetter_umsatzdaten_kiwo, errorbar=('ci', 95), capsize=0.1)
plt.title('Mean Sales per Wind Speed with 95% Confidence Intervals')
plt.ylabel('Mean Sales')
plt.xlabel('Wind Speed (Beaufort)')
plt.show()

#Sales per Temperature Category
plt.figure(figsize=(10, 6))
sns.barplot(x='Temperature_Category', y='Umsatz', data=wetter_umsatzdaten_kiwo, errorbar=('ci', 95), capsize=0.1)
plt.title('Mean Sales per Temperature Category with 95% Confidence Intervals', fontsize=20)
plt.ylabel('Mean Sales', fontsize=20)
plt.xlabel('Temperature Category', fontsize=20)
plt.xticks(rotation=45, fontsize=20)
plt.yticks(fontsize=20)
plt.tight_layout()
plt.show()


#Sales per Cloud Status
plt.figure(figsize=(10, 6))
sns.barplot(x='Cloud_Status', y='Umsatz', data=wetter_umsatzdaten_kiwo, errorbar=('ci', 95), capsize=0.1)
plt.title('Mean Sales per Cloud Status with 95% Confidence Intervals')
plt.ylabel('Mean Sales')
plt.xlabel('Cloud Status')
plt.show()

#Sales per Rain Status
plt.figure(figsize=(10, 6))
sns.barplot(x='Rain_Status', y='Umsatz', data=wetter_umsatzdaten_kiwo, ci=95, capsize=0.1)
plt.title('Mean Sales per Rain Status with 95% Confidence Intervals')
plt.ylabel('Mean Sales')
plt.xlabel('Rain Status')
plt.show()



# Descriptive statistics of variables

In [None]:
# Step 3: Descriptive Statistics
print(wetter_umsatzdaten_kiwo['Windgeschwindigkeit'].describe())
print(wetter_umsatzdaten_kiwo['Windgeschwindigkeit_Beaufort'].describe())
print(wetter_umsatzdaten_kiwo['Temperatur'].describe())
print(wetter_umsatzdaten_kiwo['Bewoelkung'].describe())
print(wetter_umsatzdaten_kiwo['Cloud_Status'].describe())
print(wetter_umsatzdaten_kiwo['Wettercode'].describe())

print("Data visualization complete.")