# Imports

In [None]:
import pandas as pd 
import json
import re
from ast import literal_eval
import requests
import datetime
import openmeteo_requests
import requests_cache
from retry_requests import retry
import time

# Adding weather informations by date

## Constants

In [None]:
BASE_URL_CLIMA = "https://archive-api.open-meteo.com/v1/archive"
BASE_URL_IP = "http://ip-api.com/batch?fields=query,status,lat,lon"
PATH_IPS_DF = "spotify_history/ips_address_df.csv"
PATH_WEATHER_DF = "spotify_history/just_weather.csv"

## Methods

### Address/IP

In [None]:
def get_address_by_list_ips(list_ips):
    url = BASE_URL_IP
    dict_address = []
    
    try:
        response = requests.post(url, json=list_ips)
        
        response.raise_for_status()
        
        results = response.json()
        
        for info in results:
            if info.get('status') == 'success':
                ip = info.get('query')
                lat = float(info.get('lat'))
                lon = float(info.get('lon'))
                dict_address.append({ip:f"{lat:.1f},{lon:.1f}"})
            else:
                print(f"Erro ao localizar o IP: {info.get('query')}")
        
    except requests.exceptions.RequestException as e:
        print(f"Erro na requisição: {e}")
        for i in list_ips:
            dict_address.append({i:None})

    return dict_address

In [None]:
def set_address_by_ip(df):
    all_ips = list(set(df.ip_addr))
    all_address = []
    batch_ips = []
    count_request = 0
    for i in all_ips:
        batch_ips.append(i)
        if (len(batch_ips) >= 99):
            all_address.extend(get_address_by_list_ips(batch_ips))
            batch_ips = []
            count_request += 1
            if (count_request >= 14):
                print("Esperando 1 minuto para mais requisições")
                time.sleep(60)
                count_request = 0

    return all_address
    

### Weather

In [None]:
def get_weather_history(batch_lat, batch_lon, data_inicio, data_fim):
    # Setup the Open-Meteo API client with cache and retry on error
    cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)

    # Make sure all required weather variables are listed here
    # The order of variables in hourly or daily is important to assign them correctly below
    params = {
        "latitude": batch_lat,
        "longitude": batch_lon,
        "start_date": data_inicio,
        "end_date": data_fim,
        "hourly": ["temperature_2m", "precipitation"],
    }
        
    responses = openmeteo.weather_api(BASE_URL_CLIMA, params=params)   
    
    all_responses = []
    for response in responses:
        # Process first location. Add a for-loop for multiple locations or weather models
        hourly = response.Hourly()
        hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
        hourly_precipitation = hourly.Variables(1).ValuesAsNumpy()

        hourly_data = {"date": pd.date_range(
            start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
            end =  pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
            freq = pd.Timedelta(seconds = hourly.Interval()),
            inclusive = "left"
        )}

        hourly_data["temperature"] = hourly_temperature_2m
        hourly_data["precipitation"] = hourly_precipitation

        hourly_data["lat_long"] = f"{float(response.Latitude()):.1f},{float(response.Longitude()):.1f}"

        hourly_dataframe = pd.DataFrame(data = hourly_data)
        all_responses.append(hourly_dataframe)

    all_df = pd.concat(all_responses)
    return all_df


In [None]:
def get_all_weather(df):
    data_primeira_musica = df.iloc[0].ts
    data_primeira_musica = data_primeira_musica.split("T")[0]
    data_ultima_musica = df.iloc[-1].ts
    data_ultima_musica = data_ultima_musica.split("T")[0]

    address = list(set(df["lat_long"]))
    latitudes, longitudes = [], []

    count_requisicoes = 0
    all_weather = []

    for i in address:
        if (str(i).upper() != "NAN"):
            lat_lon = i.split(",")
            lat, lon = float(lat_lon[0]), float(lat_lon[1])
        else:
            # Adding São Paulo lat lon if is nan
            lat, lon = -23.5471, -46.6372
        latitudes.append(lat)
        longitudes.append(lon)
    
    for i in range(0, len(latitudes), 50):
        batch_lat = latitudes[i:i+50]
        batch_lon = longitudes[i:i+50]
        all_weather.append(get_weather_history(batch_lat, batch_lon, data_primeira_musica, data_ultima_musica))        
        
        if (count_requisicoes >= 19):
            count_requisicoes = 0
            print("Waiting 1 minute for more api calls")
            time.sleep(60)
        else:
            count_requisicoes += 1            

    all_weather_df = pd.concat(all_weather)

    return all_weather_df

In [None]:
def set_weather(row, weather_df:pd.DataFrame):
    datetime_row = datetime.datetime.fromisoformat(row["ts"])
    date_row = datetime_row.strftime('%Y-%m-%d')
    hour_row = datetime_row.hour
    lat_long_row = row["lat_long"]
    filtered = weather_df[
        (weather_df["lat_long"] == lat_long_row) 
        & (weather_df["date"] == date_row) 
        &(weather_df["hour"] == hour_row)
        ]
    if (not filtered.empty):
        row["temperature"] = filtered["temperature"].array[0]
        row["precipitation"] = filtered["precipitation"].array[0]
    else:
        row["temperature"] = "NaN"
        row["precipitation"] = "NaN"
    print(row)
    
    return row

## Creating address df

In [None]:
final_df = pd.read_csv("final_df.csv")

In [None]:
address_df = final_df.copy()
all_address = set_address_by_ip(address_df)


In [None]:
df = pd.DataFrame([
    {'ip': list(d.keys())[0], 'lat_long': list(d.values())[0]} 
    for d in all_address
])
ips_address_df = pd.merge(address_df, df, how='left', left_on='ip_addr', right_on='ip').drop(columns="ip")
ips_address_df


In [None]:
ips_address_df.to_csv(PATH_IPS_DF)

## Creating weather df

In [None]:
weather_df = pd.read_csv(PATH_IPS_DF).drop(columns="Unnamed: 0")
display(weather_df)

In [None]:
just_weather_df = get_all_weather(weather_df)

In [None]:
all_datetimes = just_weather_df["date"]
all_dates, all_hours = [], []
for i in all_datetimes:
    all_dates.append(i.strftime('%Y-%m-%d'))
    all_hours.append(i.hour)

just_weather_df["date"] = all_dates
just_weather_df["hour"] = all_hours
just_weather_df

In [None]:
just_weather_df.to_csv(PATH_WEATHER_DF)
just_weather_df

## Creating final df with weather

In [None]:
just_weather_df = pd.read_csv(PATH_WEATHER_DF)

final_weather_df = weather_df.apply(set_weather, axis=1, weather_df=just_weather_df)
final_weather_df

In [None]:
final_weather_df.to_csv("final_df.csv")