In [None]:
from tools import DataTools
from data import Data
from datetime import datetime
import pandas as pd
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import seaborn as sns
import gc

## 🕒 Step 1: Define the Date for Analysis

In [None]:
specified_time = "2022-11-29 10:00"
specified_time = datetime.strptime(specified_time, "%Y-%m-%d %H:%M")
formatted_time = specified_time.strftime("%Y")

## 📂 Step 2: Load Station Data

In [None]:
path = r"..\data_files"
data_directory = r"..\data_files\data"
correct_header_data_bikes = ["city", "id", "request_date", "datetime", "bikes"]

station_data = DataTools.open_files_in_directory(path, "bike_station", "\t")
bike_station = Data()
bike_station.get_data(station_data)
bike_station.filter_dataframes("city", ["amiens", "marseille"])
del station_data

## 🧪 Step 3: Load Pollution and Weather Station Data

In [None]:
station_data = DataTools.open_files_in_directory(path, "pollution_station", ",")
pollution_station = Data()
pollution_station.get_data(station_data)
pollution_station.filter_dataframes("city", ["amiens", "marseille"])
del station_data

weather_data = DataTools.open_files_in_directory(data_directory, f"weather_{formatted_time}", ",")
data_weather = Data()
data_weather.get_data(weather_data)
data_weather.filter_dataframes("name", ["Amiens", "Marseille"])
del weather_data

pollution_data = DataTools.open_files_in_directory(data_directory, f"pollution_{formatted_time}", ",")
data_pollution = Data()
data_pollution.get_data(pollution_data)
data_pollution.filter_dataframes("name", ["Amiens"])
del pollution_data

## 🚲 Step 4: Load and Merge Bike Data

In [None]:
bike_data = DataTools.open_files_in_directory(data_directory, f"bike_{formatted_time}", "\t")
bike_data = DataTools.rename_header(bike_data, correct_header_data_bikes, keep_old_header=True)

data_bike = Data()
data_bike.get_data(bike_data)
data_bike.filter_dataframes("city", ["amiens", "marseille"])
del bike_data

data_bike.data = DataTools.merge_dataframes(
    data_bike.data, bike_station.data, "id", "id", ["bike_stands", "latitude", "longitude", "id_pollution"]
)

data_pollution.data = DataTools.merge_dataframes(
    data_pollution.data, pollution_station.data, "id", "id", ["latitude", "longitude"]
)

del pollution_station
del bike_station

## 📊 Step 5: Calculate Bike Usage Capacity and Statistics

In [None]:
bike_count_amiens = DataTools.calul_capacity(data_bike.data, "amiens")
bike_count_marseille = DataTools.calul_capacity(data_bike.data, "marseille")

print(f"Number of slots in Amiens: {bike_count_amiens}")
print(f"Number of slots in Marseille: {bike_count_marseille}")

dailyuse_amiens, period_use_amiens, useperhour_amiens = DataTools.calculate_use(
    data_bike.data[data_bike.data["city"] == "amiens"]
)
dailyuse_marseille, period_use_marseille, useperhour_marseille = DataTools.calculate_use(
    data_bike.data[data_bike.data["city"] == "marseille"]
)

## 🔍 Step 6: Perform Correlation Analysis

In [None]:
DataTools.corr_analysis(
    [dailyuse_amiens, data_weather.data],
    ["total_bikes_used", ["temp", "temp_max", "temp_min", "humidity", "speed", "clouds"]],
)

DataTools.corr_analysis(
    [dailyuse_amiens, data_pollution.data],
    ["total_bikes_used", ["NO", "NO2", "NOX as NO2", "O3", "PM10", "PM2.5"]],
)

DataTools.corr_analysis(
    [dailyuse_marseille, data_weather.data],
    ["total_bikes_used", ["temp", "temp_max", "temp_min", "humidity", "speed", "clouds"]],
)

DataTools.corr_analysis(
    [dailyuse_marseille, data_pollution.data],
    ["total_bikes_used", ["NO", "NO2", "NOX as NO2", "O3", "PM10", "PM2.5"]],
)

## ✅ Final Step: Clean Up

In [None]:
gc.collect()
print("Program completed successfully. Memory freed.")