In [1]:
import os
from google.cloud import bigquery
from datetime import datetime
import pandas_gbq
import pandas as pd
import random
import numpy as np

In [2]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/Users/Alfred/Documents/Trabajo/Personal/los-outliers/Data/climate-prediction-389419-10c7097dc497.json"


In [3]:

def upload_dataframe_to_bigquery(dataframe, full_table_id):
    # Create a BigQuery client
    client = bigquery.Client()
    # Append the dataframe to the BigQuery table
    pandas_gbq.to_gbq(dataframe, full_table_id, if_exists='append', project_id='climate-prediction-389419')


In [4]:
# clean data function
def clean_data(file_path):
    df = pd.read_csv(file_path)
    df = df[['Year', 'Start Month', 'Country', 'Location', 'Disaster Type']]

    # Rename 'Start Month' column to 'Month'
    df.rename(columns={'Start Month': 'Month'}, inplace=True)

    # Create column with Precipitation (MM) random values between 0 and 100
    df['Precipitation_MM'] = df['Year'].apply(lambda x: random.randint(0, 100))

    # Create column with Temperature random values between -10 and 40
    df['Temperature'] = df['Year'].apply(lambda x: random.randint(-10, 40))

    # Drop null values in Location and Month
    df = df.dropna(subset=['Location', 'Month'])

    # Convert 'Year' to int
    df['Year'] = df['Year'].astype(int)

    # Create column with Disaster Probability (or prediction) random values between 0 and 1
    df['Disaster_Probability'] = df['Year'].apply(lambda x: random.random())

    # Replace spaces in column names with underscores
    df.columns = df.columns.str.replace(' ', '_')

    return df


In [37]:
df = clean_data('/Users/Alfred/Documents/Trabajo/Personal/los-outliers/Data/1900_2021_DISASTERS.xlsx - emdat data.csv')

KeyboardInterrupt: 

In [31]:
df.head(30)

Unnamed: 0,Year,Start_Month,Country,Location,Disaster_Type,Precipitation_MM,Temperature,Disaster_Probability,Ciudad_Pais
0,1900,,Cabo Verde,Countrywide,Drought,55,8,0.035097,"Countrywide, Cabo Verde"
1,1900,,India,Bengal,Drought,61,38,0.688285,"Bengal, India"
2,1902,4.0,Guatemala,"Quezaltenango, San Marcos",Earthquake,73,16,0.05336,"Quezaltenango, San Marcos, Guatemala"
5,1903,4.0,Canada,"Frank, Alberta",Mass movement (dry),83,-2,0.242061,"Frank, Alberta, Canada"
7,1904,11.0,Bangladesh,Chittagong,Storm,5,12,0.609005,"Chittagong, Bangladesh"
8,1905,8.0,Canada,"Spence's Bridge, British Columbia",Mass movement (dry),32,19,0.162969,"Spence's Bridge, British Columbia, Canada"
9,1905,4.0,India,Kangra,Earthquake,29,13,0.582946,"Kangra, India"
10,1906,8.0,Chile,Valparaiso,Earthquake,36,30,0.549397,"Valparaiso, Chile"
11,1906,1.0,Colombia,Tumako,Earthquake,42,22,0.816978,"Tumako, Colombia"
12,1906,5.0,Belgium,Louvain region,Flood,53,9,0.655348,"Louvain region, Belgium"


In [23]:
# read csv file and save as dataframe
extended_df = pd.read_csv('/Users/Alfred/Documents/Trabajo/Personal/los-outliers/Data/extendido_1900_2021_DISASTERS.xlsx - emdat data.csv')
# rename precipitation column to Precipitation_MM
extended_df.rename(columns={'Precipitation_(MM)': 'Precipitation_MM'}, inplace=True)

# rename Disaster_Probability_(or_prediction) column to Disaster_Probability
extended_df.rename(columns={'Disaster_Probability_(or_prediction)': 'Disaster_Probability'}, inplace=True)


In [24]:
# Temperature under 5 is cold, between 5 and 25 is warm, and above 25 is hot
extended_df['Temperature'] = extended_df['Temperature'].apply(lambda x: 'cold' if x < 5 else 'warm' if 5 <= x <= 25 else 'hot')

# Precipitation under 10 is low, between 10 and 30 is medium, and above 30 is high
extended_df['Precipitation_MM'] = extended_df['Precipitation_MM'].apply(lambda x: 'low' if x < 10 else 'medium' if 10 <= x <= 30 else 'high')

In [25]:
extended_df.head()

Unnamed: 0,Year,Country,Location,Month,Disaster_Type,Precipitation_MM,Temperature,Disaster_Probability
0,1902,Guatemala,"Quezaltenango, San Marcos",4.0,Earthquake,high,hot,1
1,1902,Guatemala,,4.0,Volcanic activity,high,warm,1
2,1902,Guatemala,,10.0,Volcanic activity,high,cold,1
3,1903,Canada,"Frank, Alberta",4.0,Mass movement (dry),high,cold,1
4,1904,Bangladesh,Chittagong,11.0,Storm,high,hot,1


In [16]:
upload_dataframe_to_bigquery(extended_df, 'climate-prediction-389419.climate_prediction.disasters')

In [None]:
from kats.consts import MultivariateTimeSeriesData
from kats.models.var import VARModel, VARParams
from typing import List

# define a list of variable names
value_vars: List[str] = ["Precipitation_(MM)", "Temperature", "Location", "Country", "Year", "Month", "Disaster_Type"]

# Iterar sobre las distintas categorias de "Location" en df_test
for location in df_test['Location'].unique():
    # Filtrar df_test por location
    df_location = df_test[df_test['Location'] == location]
    
    # create a MultivariateTimeSeriesData object
    multivariate_time_series = MultivariateTimeSeriesData(
        df=df_location, 
        time_col_name="Year", 
        value_col_names=value_vars
    )
    
    # Crear un objeto VARModel con serie_de_tiempo
    params=VARParams()
    m=VARModel(data=multivariate_time_series, params=params)
    
    # Entrenar el modelo
    m.fit()
    
    # Predecir la columna Disaster_Probability_(or_prediction) de df_location
    prediction = m.predict(steps=1)
    
    # Agregar la columna "Disaster_Probability_(or_prediction)" a df_location
    # assuming that Disaster_Probability is the first variable in the prediction
    df_location['Disaster_Probability_(or_prediction)'] = prediction.y[0]

    # Agregar df_location a df_test
    df_test = df_test.append(df_location)
