# Métodos Cuantitativos y de Simulación
## Proyecto Final  
Proyecto para simular el comportamiento de la temperatura superficial de los países.  

- El data set utiliza temperaturas superficiales de la tierra de varios países, contiene datos desde 1743. [Dataset](https://www.kaggle.com/berkeleyearth/climate-change-earth-surface-temperature-data)

In [76]:
import os
import pandas as pd
import numpy as np
import json

## Limpieza de datos

- Eliminar filas vacias
- Sacar continentes

In [77]:
continents = ['North America', 'South America', 'Africa', 'Europe', 'Asia','Antarctica']

data = pd.read_csv(os.path.join("TemperatureFiles","GlobalLandTemperaturesByCountry.csv"))
data = data.dropna()

for continent in continents:
    data = data[data.Country != continent]

In [78]:
# 1743

year = 1948

data['dt'] = data['dt'].astype(str).str[:4]

data = data[data['dt'].astype(int) >= year]

data.to_csv(os.path.join('Clean','icResults.csv'), index = False)

data

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
2450,1948,-4.641,0.867,Åland
2451,1948,-3.767,0.744,Åland
2452,1948,0.743,0.693,Åland
2453,1948,3.959,0.599,Åland
2454,1948,8.036,0.501,Åland
2455,1948,12.181,0.302,Åland
2456,1948,16.355,0.466,Åland
2457,1948,14.849,0.282,Åland
2458,1948,11.981,0.149,Åland
2459,1948,6.246,0.155,Åland


## Acomodar datos por país

- Se guardan las temperaturas por año de cada país

In [79]:
"""{"Mexico":
    {
        "1743": [3.2,3.1,3.0],
        "1744": [],
    },
    "USA":
    
}"""

countries_by_temp = dict()
dc = pd.read_csv(os.path.join("Clean","icResults.csv"))

for ind in dc.index:
    country =  dc['Country'][ind]
    if country not in countries_by_temp:
        countries_by_temp[country] = dict()
    year = dc['dt'][ind]
    if year not in countries_by_temp[country]:
        countries_by_temp[country][year] = []
    countries_by_temp[country][year].append(dc['AverageTemperature'][ind])
    


## Sacar promedio por año
- Se saca la temperatura promedio de cada año de cada país

In [80]:
#countries_by_temp

avg_per_year = {}

for country in countries_by_temp:
    avg_per_year[country] = {}
    
    for year in countries_by_temp[country]:
        
        avg = sum(countries_by_temp[country][year])/len(countries_by_temp[country][year])
        avg_per_year[country][year] = avg

In [81]:
dfTemp = pd.DataFrame.from_dict(avg_per_year)
dfTemp

Unnamed: 0,Åland,Afghanistan,Albania,Algeria,American Samoa,Andorra,Angola,Anguilla,Antigua And Barbuda,Argentina,...,United States,Uruguay,Uzbekistan,Venezuela,Vietnam,Virgin Islands,Western Sahara,Yemen,Zambia,Zimbabwe
1948,6.045500,14.335250,12.753917,23.071333,26.680417,11.935583,21.896333,26.847333,26.607250,14.778333,...,8.481667,17.168250,12.822250,25.180750,23.963667,26.615333,22.552583,26.334667,21.246417,21.110500
1949,7.081083,13.350083,12.518250,23.188167,26.851000,12.317917,21.928250,26.413083,26.231833,14.771333,...,8.888083,17.289250,11.123833,24.963667,24.044333,26.170167,22.543750,26.128250,21.684583,21.535083
1950,5.957917,13.043500,13.812833,22.781833,26.520083,11.812000,21.571833,26.266417,26.052333,14.795083,...,8.656417,17.413250,11.026167,24.793500,23.944833,25.997333,22.475833,25.815667,20.847667,21.258417
1951,5.820583,13.967750,13.569833,23.321667,26.689667,10.972917,22.002167,26.695250,26.537750,14.836917,...,8.416583,17.339250,11.909083,25.358667,23.863667,26.432583,21.958833,26.388083,21.294167,21.102333
1952,4.795000,14.175417,13.573917,23.185417,27.063750,11.660833,22.187667,26.866917,26.714250,14.916333,...,8.979417,17.530667,12.312417,25.560667,24.186833,26.609083,22.330167,26.081667,21.522750,21.325500
1953,6.746917,14.650750,12.586333,22.869417,26.949250,11.471000,21.724833,27.037250,26.863500,14.974750,...,9.530417,17.579500,12.571333,25.381333,24.204500,26.724917,22.465583,26.399833,21.262750,20.757417
1954,5.551750,13.691333,12.450083,22.652417,26.866083,10.902000,21.567000,26.660667,26.461583,14.491417,...,9.410583,17.239667,11.413583,25.048917,24.029000,26.316667,21.901667,26.350083,21.299000,21.175583
1955,4.887417,14.642583,13.148083,24.004833,26.710750,11.727500,21.578250,26.725000,26.600250,14.191167,...,8.517167,16.708000,13.513750,24.919917,23.679583,26.384000,23.170500,25.956583,21.217833,20.913500
1956,4.045583,14.191083,12.172333,22.611333,26.466833,10.363250,21.521917,26.626833,26.426833,14.021417,...,8.699917,16.393083,12.247750,24.853333,23.664333,26.281417,21.892833,25.957167,20.918167,20.894250
1957,5.594583,12.777167,13.078417,22.620250,26.762917,11.144083,21.991500,26.887167,26.677750,14.656250,...,9.126333,17.376250,11.721083,25.306500,24.048917,26.626583,21.943667,25.953417,21.658083,21.821250


In [82]:
startYear = dfTemp.dropna()
startYear

Unnamed: 0,Åland,Afghanistan,Albania,Algeria,American Samoa,Andorra,Angola,Anguilla,Antigua And Barbuda,Argentina,...,United States,Uruguay,Uzbekistan,Venezuela,Vietnam,Virgin Islands,Western Sahara,Yemen,Zambia,Zimbabwe
1948,6.045500,14.335250,12.753917,23.071333,26.680417,11.935583,21.896333,26.847333,26.607250,14.778333,...,8.481667,17.168250,12.822250,25.180750,23.963667,26.615333,22.552583,26.334667,21.246417,21.110500
1949,7.081083,13.350083,12.518250,23.188167,26.851000,12.317917,21.928250,26.413083,26.231833,14.771333,...,8.888083,17.289250,11.123833,24.963667,24.044333,26.170167,22.543750,26.128250,21.684583,21.535083
1950,5.957917,13.043500,13.812833,22.781833,26.520083,11.812000,21.571833,26.266417,26.052333,14.795083,...,8.656417,17.413250,11.026167,24.793500,23.944833,25.997333,22.475833,25.815667,20.847667,21.258417
1951,5.820583,13.967750,13.569833,23.321667,26.689667,10.972917,22.002167,26.695250,26.537750,14.836917,...,8.416583,17.339250,11.909083,25.358667,23.863667,26.432583,21.958833,26.388083,21.294167,21.102333
1952,4.795000,14.175417,13.573917,23.185417,27.063750,11.660833,22.187667,26.866917,26.714250,14.916333,...,8.979417,17.530667,12.312417,25.560667,24.186833,26.609083,22.330167,26.081667,21.522750,21.325500
1953,6.746917,14.650750,12.586333,22.869417,26.949250,11.471000,21.724833,27.037250,26.863500,14.974750,...,9.530417,17.579500,12.571333,25.381333,24.204500,26.724917,22.465583,26.399833,21.262750,20.757417
1954,5.551750,13.691333,12.450083,22.652417,26.866083,10.902000,21.567000,26.660667,26.461583,14.491417,...,9.410583,17.239667,11.413583,25.048917,24.029000,26.316667,21.901667,26.350083,21.299000,21.175583
1955,4.887417,14.642583,13.148083,24.004833,26.710750,11.727500,21.578250,26.725000,26.600250,14.191167,...,8.517167,16.708000,13.513750,24.919917,23.679583,26.384000,23.170500,25.956583,21.217833,20.913500
1956,4.045583,14.191083,12.172333,22.611333,26.466833,10.363250,21.521917,26.626833,26.426833,14.021417,...,8.699917,16.393083,12.247750,24.853333,23.664333,26.281417,21.892833,25.957167,20.918167,20.894250
1957,5.594583,12.777167,13.078417,22.620250,26.762917,11.144083,21.991500,26.887167,26.677750,14.656250,...,9.126333,17.376250,11.721083,25.306500,24.048917,26.626583,21.943667,25.953417,21.658083,21.821250


## Clasificación de temperaturas
### Hay 5 temperaturas:
- <b>Muy frío</b>: abajo de 0 grados
- <b>Frío</b>: entre 0 y 10 grados
- <b>Templado</b>: entre 10 y 20 grados
- <b>Cálido</b>: entre 20 y 25 grados
- <b>Muy Cálido</b>: arriba de 25 grados  

## Creación de matrices

- Las matrices cuentan las transiciones de estado de cada país

In [83]:
"""

<0 - Muy Frio
0 < 10 - Frio
10 < 20 - Templado
20 < 25 - Calido
25 <   - Muy Calido 


http://www.met.igp.gob.pe/clima/clasif.html
"""

matrix_countries = {}

for country in countries_by_temp:
    matrix_countries[country] = np.zeros(shape=(5,5))

In [84]:
def get_temp(temp):
    if temp < 0:
        return 0
    elif temp < 10:
        return 1
    elif temp < 20:
        return 2
    elif temp < 25:
        return 3
    else:
        return 4

def add_counter(country, prev, current):
    row = get_temp(prev)
    col = get_temp(current)
    matrix_countries[country][row][col] += 1

In [85]:
for (columnName, columnData) in dfTemp.iteritems():
    noNanData = []
    for number in columnData:
        if number == number:
            noNanData.append(number)
    for i in range(len(noNanData)-2):
        prev = noNanData[i]
        current = noNanData[i+1]
        add_counter(columnName, prev, current)

In [86]:
print(matrix_countries["Chile"])

[[ 0.  0.  0.  0.  0.]
 [ 0. 39. 11.  0.  0.]
 [ 0. 10.  4.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]]


## Matrices estocásticas
#### Para generar las matrices estocásticas se realizan dos pasos

- Se reducen a matrices cuadradas sin filas y columnas llenas de 0s
- Se convierten los contadores a probabilidades

In [87]:
#[2,3,4]

#{"":[[[],[]],[]]}
#{"":{"matrix":, "states":}}

def reduce_matrix(matrix_country):
    states = []
    for i in range(5):
        for j in range(5):
            if matrix_country[i][j] != 0:
                states.append(i)
                break
    #print(states)
    reduced_matrix = np.zeros(shape=(len(states),len(states)))
    row = 0
    for i in states:
        column = 0
        for j in states:
            reduced_matrix[row][column] = matrix_country[i][j]
            column += 1
        row += 1
    return [reduced_matrix, states]

In [88]:
reduced_matrices = {}

for country_matrix in matrix_countries:
    reduced_matrices[country_matrix] = {}
    temp = reduce_matrix(matrix_countries[country_matrix])
    reduced_matrices[country_matrix]["Matrix"] = temp[0]
    reduced_matrices[country_matrix]["States"] = temp[1]

In [89]:
print(reduced_matrices["Chile"])

{'Matrix': array([[39., 11.],
       [10.,  4.]]), 'States': [1, 2]}


In [90]:
def estocastica(matrix):
    for i in range(len(matrix)):
        add = sum(matrix[i])
        for j in range(len(matrix[i])):
            matrix[i][j] = matrix[i][j]/add      

In [91]:
for reduced_matrix in reduced_matrices:
    estocastica(reduced_matrices[reduced_matrix]["Matrix"])

In [92]:
for reduced_matrix in reduced_matrices:
    reduced_matrices[reduced_matrix]["Matrix"] = reduced_matrices[reduced_matrix]["Matrix"].tolist()

In [93]:
with open(os.path.join("Clean","markov.json"), 'w') as file:
    json.dump(reduced_matrices, file)

In [95]:
print(reduced_matrices["Egypt"])

{'Matrix': [[0.9841269841269841, 0.015873015873015872], [1.0, 0.0]], 'States': [3, 4]}
