<h2>Unir varios CSV en uno</h2>

El porgrama lee un archivo .zip con los CSV dentro

In [5]:
import pandas as pd
import zipfile
import os
import glob

# ============================
# 📥 Ruta al archivo .zip
# ============================
zip_path = 'Bicicletas_Sitycleta.zip'  # 👈 cambia nombre si es distinto
extract_path = 'datasets/'

# ============================
# 🎯 Descomprimir archivo .zip
# ============================
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print(f"Archivo descomprimido en: {extract_path}")

# ============================
# 🔎 Buscar todos los archivos de Bicicletas disponibles dentro del zip
# ============================
file_list = glob.glob(os.path.join(extract_path, '*Bicicletas-disponibles-en-bases-*.xlsx')) #Modificar el nombre de los contenidos
print(f"Archivos encontrados: {len(file_list)}")

# ============================
# 📥 Leer y concatenar todos los archivos
# ============================
df_list = []
for file in file_list:
    df_temp = pd.read_excel(file)
    df_list.append(df_temp)

df_combined = pd.concat(df_list, ignore_index=True)

# ============================
# 🕑 Convertir a datetime y ordenar
# ============================
df_combined['Time stamp'] = pd.to_datetime(df_combined['Time stamp'])
df_combined.sort_values(by='Time stamp', inplace=True)

# ============================
# ✅ Mostrar dataset final combinado
# ============================
print(df_combined.head())
print(f"\nTotal de registros combinados: {df_combined.shape[0]}")

# ============================
# 💾 Guardar en CSV para trabajar después
# ============================
output_path = 'sitycleta_2024_2025_combined.csv'
df_combined.to_csv(output_path, index=False)
print(f"\nDataset combinado guardado en: {output_path}")

Archivo descomprimido en: datasets/
Archivos encontrados: 17
       Place number          Time stamp  Free bikes
95998          3400 2023-12-01 00:06:00           0
95999          3417 2023-12-01 00:08:00           7
96000          3472 2023-12-01 00:08:00           0
96001          3416 2023-12-01 00:12:00           9
96002          3412 2023-12-01 00:18:00           5

Total de registros combinados: 1444050

Dataset combinado guardado en: sitycleta_2024_2025_combined.csv


<h3>Añadir variables de calendario</h3>

In [3]:
import pandas as pd

# Cargar dataset
df = pd.read_csv('/content/sitycleta_2024_2025_combined.csv')
df['Time stamp'] = pd.to_datetime(df['Time stamp'])

# Crear variables de calendario
df['hour_of_day'] = df['Time stamp'].dt.hour
df['day_of_week'] = df['Time stamp'].dt.dayofweek   # Lunes=0, Domingo=6
df['is_weekend'] = df['day_of_week'].isin([5,6]).astype(int)
df['month'] = df['Time stamp'].dt.month
df['week_of_year'] = df['Time stamp'].dt.isocalendar().week

print(df.head())

   Place number          Time stamp  Free bikes  hour_of_day  day_of_week  \
0          3400 2023-12-01 00:06:00           0            0            4   
1          3417 2023-12-01 00:08:00           7            0            4   
2          3472 2023-12-01 00:08:00           0            0            4   
3          3416 2023-12-01 00:12:00           9            0            4   
4          3412 2023-12-01 00:18:00           5            0            4   

   is_weekend  month  week_of_year  
0           0     12            48  
1           0     12            48  
2           0     12            48  
3           0     12            48  
4           0     12            48  


<h3>Añadir festivos con python-holidays</h3>

In [7]:
!pip install holidays



In [2]:
import holidays

canarias_holidays = holidays.CountryHoliday('ES', subdiv='CN', years=[2024, 2025])

for date, name in sorted(canarias_holidays.items()):
    print(f"{date}: {name}")


2024-01-01: New Year's Day
2024-01-06: Epiphany
2024-03-28: Maundy Thursday
2024-03-29: Good Friday
2024-05-01: Labor Day
2024-05-30: Day of the Canary Islands
2024-08-15: Assumption Day
2024-10-12: National Day
2024-11-01: All Saints' Day
2024-12-06: Constitution Day
2024-12-25: Christmas Day
2025-01-01: New Year's Day
2025-01-06: Epiphany
2025-04-17: Maundy Thursday
2025-04-18: Good Friday
2025-05-01: Labor Day
2025-05-30: Day of the Canary Islands
2025-08-15: Assumption Day
2025-11-01: All Saints' Day
2025-12-06: Constitution Day
2025-12-08: Immaculate Conception
2025-12-25: Christmas Day


2024-01-01: Año Nuevo

2024-01-06: Epifanía del Señor

2024-03-28: Jueves Santo

2024-03-29: Viernes Santo

2024-05-01: Día del Trabajador

2024-05-30: Día de Canarias

2024-08-15: Asunción de la Virgen

2024-10-12: Fiesta Nacional de España

2024-11-01: Todos los Santos

2024-12-06: Día de la Constitución

2024-12-25: Navidad

<h3>Añadir datos meteorológicos (Meteostat)</h3>

In [1]:
!pip install meteostat

Collecting meteostat
  Downloading meteostat-1.6.8-py3-none-any.whl.metadata (4.6 kB)
Downloading meteostat-1.6.8-py3-none-any.whl (31 kB)
Installing collected packages: meteostat
Successfully installed meteostat-1.6.8


In [4]:
from meteostat import Hourly, Stations
from datetime import datetime

# Definir periodo de tiempo
start = datetime(2023, 12, 1)
end = datetime(2025, 4, 30)

# Buscar estación meteorológica más cercana a Las Palmas (aprox lat, lon)
stations = Stations()
stations = stations.nearby(28.1235, -15.4363)
station = stations.fetch(1)

print("\nEstación meteorológica seleccionada:")
print(station)

# Descargar datos horarios
data_weather = Hourly(station.index[0], start, end)
data_weather = data_weather.fetch()

# Reset index para merge
data_weather = data_weather.reset_index()

# Redondear timestamp a hora
data_weather['time'] = data_weather['time'].dt.floor('h')

# Redondear dataset bicicletas a hora
df['hour'] = df['Time stamp'].dt.floor('h')

# Merge clima + dataset bicicletas
df = pd.merge(df, data_weather[['time', 'temp', 'prcp', 'wspd']], how='left',
              left_on='hour', right_on='time')

# Renombrar columnas
df.rename(columns={'temp': 'temp_c', 'prcp': 'precip_mm', 'wspd': 'wind_speed_kmh'}, inplace=True)

# Eliminar columnas auxiliares
df.drop(['hour', 'time'], axis=1, inplace=True)

print(df.head())



Estación meteorológica seleccionada:
                                name country region    wmo  icao  latitude  \
id                                                                           
60030  Gran Canaria - Canary Islands      ES     CN  60030  GCLP   27.9333   

       longitude  elevation       timezone hourly_start hourly_end  \
id                                                                   
60030   -15.3833       23.0  Europe/Madrid   1950-03-16 2025-05-12   

      daily_start  daily_end monthly_start monthly_end      distance  
id                                                                    
60030  1957-01-20 2025-05-09    1957-01-01  2022-01-01  21779.668274  




   Place number          Time stamp  Free bikes  hour_of_day  day_of_week  \
0          3400 2023-12-01 00:06:00           0            0            4   
1          3417 2023-12-01 00:08:00           7            0            4   
2          3472 2023-12-01 00:08:00           0            0            4   
3          3416 2023-12-01 00:12:00           9            0            4   
4          3412 2023-12-01 00:18:00           5            0            4   

   is_weekend  month  week_of_year  temp_c  precip_mm  wind_speed_kmh  
0           0     12            48    20.1        0.0            25.9  
1           0     12            48    20.1        0.0            25.9  
2           0     12            48    20.1        0.0            25.9  
3           0     12            48    20.1        0.0            25.9  
4           0     12            48    20.1        0.0            25.9  


<h3>Añadir todo al dataset CSV</h3>

El dataset resultante se usará en el proyecto.

In [6]:
import pandas as pd
import holidays
from meteostat import Hourly, Stations
from datetime import datetime

# ============================
# 📥 Cargar dataset base
# ============================
df = pd.read_csv('/content/sitycleta_2024_2025_combined.csv')
df['Time stamp'] = pd.to_datetime(df['Time stamp'])

# ============================
# 📅 Añadir variables calendario
# ============================
df['hour_of_day'] = df['Time stamp'].dt.hour
df['day_of_week'] = df['Time stamp'].dt.dayofweek
df['is_weekend'] = df['day_of_week'].isin([5,6]).astype(int)
df['month'] = df['Time stamp'].dt.month
df['week_of_year'] = df['Time stamp'].dt.isocalendar().week

# ============================
# 🎉 Añadir festivos (Canarias)
# ============================
canarias_holidays = holidays.Spain(subdiv='CN', years=[2024, 2025])
df['is_holiday'] = df['Time stamp'].dt.date.isin(canarias_holidays).astype(int)

# Mostrar festivos encontrados
translations = {
    'New Year\'s Day': 'Año Nuevo',
    'Epiphany': 'Epifanía del Señor',
    'Good Friday': 'Viernes Santo',
    'Labour Day': 'Día del Trabajador',
    'Assumption of Mary': 'Asunción de la Virgen',
    'All Saints\' Day': 'Todos los Santos',
    'Immaculate Conception': 'Inmaculada Concepción',
    'Christmas Day': 'Navidad',
    'Maundy Thursday': 'Jueves Santo',
    'Day of the Canary Islands': 'Día de Canarias',
    'National Day': 'Fiesta Nacional de España',
    'Constitution Day': 'Día de la Constitución'
}

unique_dates = sorted(set(df['Time stamp'].dt.date.unique()) & set(canarias_holidays.keys()))
print("\nDías festivos de Canarias encontrados en tu dataset:")
for date in unique_dates:
    original_name = canarias_holidays.get(date)
    name_es = translations.get(original_name, original_name)
    print(f"{date}: {name_es}")

# ============================
# 🌦️ Añadir datos meteorológicos (Meteostat)
# ============================
start = datetime(2023, 12, 1)
end = datetime(2025, 4, 30)

stations = Stations()
stations = stations.nearby(27.9333, -15.3833)  # Gran Canaria
station = stations.fetch(1)

print("\nEstación meteorológica seleccionada:")
print(station)

data_weather = Hourly(station.index[0], start, end)
data_weather = data_weather.fetch().reset_index()
data_weather['time'] = data_weather['time'].dt.floor('h')
df['hour'] = df['Time stamp'].dt.floor('h')

df = pd.merge(df, data_weather[['time', 'temp', 'prcp', 'wspd']],
              how='left', left_on='hour', right_on='time')
df.rename(columns={'temp': 'temp_c', 'prcp': 'precip_mm', 'wspd': 'wind_speed_kmh'}, inplace=True)
df.drop(['hour', 'time'], axis=1, inplace=True)

# ============================
# 💾 Guardar dataset final
# ============================
output_path = '/content/sitycleta_dataset_enriched.csv'
df.to_csv(output_path, index=False)
print(f"\n✅ Dataset enriquecido guardado en: {output_path}")
print(f"Registros totales: {df.shape[0]}")


Días festivos de Canarias encontrados en tu dataset:
2024-01-01: Año Nuevo
2024-01-06: Epifanía del Señor
2024-03-28: Jueves Santo
2024-03-29: Viernes Santo
2024-05-01: Labor Day
2024-05-30: Día de Canarias
2024-08-15: Assumption Day
2024-10-12: Fiesta Nacional de España
2024-11-01: Todos los Santos
2024-12-06: Día de la Constitución
2024-12-25: Navidad
2025-01-01: Año Nuevo
2025-01-06: Epifanía del Señor
2025-04-17: Jueves Santo
2025-04-18: Viernes Santo

Estación meteorológica seleccionada:
                                name country region    wmo  icao  latitude  \
id                                                                           
60030  Gran Canaria - Canary Islands      ES     CN  60030  GCLP   27.9333   

       longitude  elevation       timezone hourly_start hourly_end  \
id                                                                   
60030   -15.3833       23.0  Europe/Madrid   1950-03-16 2025-05-12   

      daily_start  daily_end monthly_start monthly_end 