In [1]:
import pandas as pd
from shapely.wkt import loads
from folium.plugins import HeatMap
from shapely.geometry import mapping
from datetime import datetime, timedelta


In [36]:

def get_portugal_holidays(start_year, end_year):
    portugal_holidays = []
    for year in range(start_year, end_year + 1):
        holidays = [
            "01/01",  # New Year's Day
            "14/04",  # Good Friday
            "16/04",  # Easter Sunday
            "25/04",  # Freedom Day
            "01/05",  # Labor Day
            "10/06",  # Portugal Day
            "15/06",  # Corpus Christi
            "15/08",  # Assumption Day
            "05/10",  # Republic Day
            "01/11",  # All Saints' Day
            "01/12",  # Restoration of Independence
            "25/12"   # Christmas Day
        ]
        holidays_with_year = [date + f"/{year}" for date in holidays]
        portugal_holidays.extend([datetime.strptime(date, "%d/%m/%Y").date() for date in holidays_with_year])

    return portugal_holidays

start_year = 2021  
end_year = 2023 

portugal_holidays = get_portugal_holidays(start_year, end_year)

In [37]:
def get_time_of_day(hour):
    if 6 < hour <= 12:
        return 'Manhã'
    elif 12 < hour <= 18:
        return 'Tarde'
    elif 18 < hour <= 24:
        return 'Noite'
    else:
        return "Madrugada"

def is_weekend(day):
    return day.weekday() >= 5  

def is_holiday(day):
    return day in portugal_holidays


In [42]:
file_path = 'January2023.csv' 
file_path_e = 'wktComplete.csv'

columns_to_keep_vodafone = ['Grid_ID','Datetime','C1', 'C3', 'C5', 'C6', 'E7', 'E8', 'E9']
columns_to_keep_wkt = ["grelha_id", 'nome','latitude','longitude']

df_dados_vodafone = pd.read_csv(file_path, encoding='latin1',usecols=columns_to_keep_vodafone)
df_wkt = pd.read_csv(file_path_e,encoding='latin1',usecols=columns_to_keep_wkt)

df_wkt = df_wkt.rename(columns={'grelha_id': 'Grid_ID'}) 

merged_df = pd.merge(df_dados_vodafone, df_wkt, on='Grid_ID')

merged_df = merged_df.drop(0)

In [43]:
merged_df

merged_df['datetime_column'] = pd.to_datetime(merged_df["Datetime"])

# Separate into date and hour columns
merged_df['date'] = merged_df['datetime_column'].dt.date
merged_df['Horas'] = merged_df['datetime_column'].dt.time




merged_df['Partes do dia'] = merged_df['datetime_column'].dt.hour.apply(get_time_of_day)
merged_df['Fim-de-semana'] = merged_df['datetime_column'].dt.date.apply(is_weekend)
merged_df['Feriado'] = merged_df['datetime_column'].dt.date.apply(is_holiday)


df = merged_df.drop(columns=['datetime_column'])


In [44]:
patterns = ['IC19','Segunda Circular', '2ª Circular','Eixo Norte-Sul']
numbers_list = [2548, 2549, 2608,2616,2617, 2670, 2731, 2792,2816,2214, 2852, 2911, 2972,3031,2973, 3032, 3091, 3092,3417, 3546, 3581, 3613, 3612, 3637, 3660, 3661, 3662, 3680, 3681, 3697, 2477, 2543, 2542, 2411, 1841, 1840, 1779, 1778, 1302, 1303, 1241, 1242, 1182, 1124, 1066, 1009, 953, 896, 839
]

combined_pattern = '|'.join(patterns)

merged_df_truncated1 = df[df['Grid_ID'].isin(numbers_list)]


merged_df_truncated2 = df[df.nome.str.contains(combined_pattern, regex=True)]

concatenated_df = pd.concat([merged_df_truncated1, merged_df_truncated2], ignore_index=True)

df_no_duplicates = concatenated_df.drop_duplicates()

columns_to_format = ['C1', 'C3', 'C5', 'C6', 'E7', 'E8', 'E9']

for col in columns_to_format:
    df_no_duplicates[col] = df_no_duplicates[col].astype(str).str.replace('.', ',')


df_no_duplicates.to_csv("January2023Clean.csv", sep=';', index=False)

  df_no_duplicates[col] = df_no_duplicates[col].astype(str).str.replace('.', ',')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no_duplicates[col] = df_no_duplicates[col].astype(str).str.replace('.', ',')


In [40]:
len(df_no_duplicates)

362880

In [None]:
len()

In [None]:
del df_dados_vodafone, df_wkt, merged_df, patterns, combined_pattern