# Exploartion

How does cycling in Stockholm change once autumn break ends and the city becomes darker, colder, and busier?
I built this data analysis project to explore how cyclist behavior shifts before and after Höstlov, and what factors might explain those patterns.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import requests
import pytz
from astral import LocationInfo
from astral.sun import sun
from scipy import stats

# FIX: stop Plotly from opening browser windows
import plotly.io as pio
pio.renderers.default = "vscode"   # or "notebook" or "notebook_connected"


## Data Loading and Cleaning

In [2]:
df_dst = pd.read_csv('stockholm_dst_end_2015_2025.csv', encoding='utf-8')
df_cykel = pd.read_csv('td_cykel_15_min_2015-2024.csv', encoding='ascii', sep=';')
df_platser = pd.read_csv('platser_cykel_2015-2024.csv', encoding='utf-8', sep=';')
df_hostlov = pd.read_csv('hostlov_2015_2023.csv')

In [None]:
# I build monthly totals for each year (2015–2023) and plot one line per year

# I keep only data between 2015 and 2023
mask = (
    (df_cykel['timestamp_local'].dt.year >= 2015) &
    (df_cykel['timestamp_local'].dt.year <= 2023)
)
df_sub = df_cykel.loc[mask, ['timestamp_local', 'antal']].copy()

# I extract year and month from the timestamp
df_sub['year'] = df_sub['timestamp_local'].dt.year
df_sub['month'] = df_sub['timestamp_local'].dt.month

# I group by year and month and sum the counts
monthly_by_year = (
    df_sub
    .groupby(['year', 'month'], as_index=False)['antal']
    .sum()
    .rename(columns={'antal': 'monthly_total'})
)

# I create readable month names
month_map = {
    1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr',
    5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Aug',
    9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'
}
monthly_by_year['month_name'] = monthly_by_year['month'].map(month_map)

# I check the first few rows
print(monthly_by_year.head())

# I plot one line per year
plt.figure(figsize=(13, 6))
years_sorted = sorted(monthly_by_year['year'].unique())

sns.lineplot(
    data=monthly_by_year,
    x='month',
    y='monthly_total',
    hue='year',
    hue_order=years_sorted,
    marker='o',
    palette='tab20'
)

plt.title('Monthly Cycling Counts by Year (2015–2023)')
plt.xlabel('Month')
plt.ylabel('Total cyclists')
plt.xticks(
    ticks=range(1, 13),
    labels=[month_map[m] for m in range(1, 13)]
)
plt.legend(title='Year', bbox_to_anchor=(1.02, 1), loc='upper left', ncol=1)
plt.tight_layout()
plt.show()
