# GMSL Analysis and Prediction 

## Final Dataset for GMSL analysis and prediction

Spajanjem prethodno dobijenih skupova podataka i izdvajanjem relevantnih atributa formira se konačan skup podataka nad kojim će se izvršiti detaljna analiza i dodatna obrada podataka.

Podaci će biti grupisani na osnovu datuma merenja na mesečnom nivou (od 1969. do 2014.)

Podatak na osnovu kog će se formirati klasa jeste **GMSL (global mean sea level)** koji predstavlja globalnu srednju vrednost nivoa mora na mesečnom nivou. 

Klasa će uzimati vrednost 0 ili 1:
* 0 - nije se desio porast nivoa mora u odnosu na prethodni mesec
* 1 - desio se porast nivoa mora u ondosu na prethnodni mesec

Podaci koji utiču na porast nivoa mora:

* Extent - ukupna površina morskog leda izražena u jedinici 10^6 kvadratnih kilometar
* WaterTemp - temperatura vode izražena u celzijusima 
* O2ml - zasićenost vode kiseonikom
* LandAvgTemp - prosečna temperatura kopna za dan merenja
* LandAndOceanAvgTemp - prosečna temperatura kopna i mora za dan merenja
* CO2mmf - prosečna koncentracija CO2 na mesečnom nivou

In [182]:
import pandas as pd
import seaborn as sb

In [183]:
df_GMSL = pd.read_csv('processed_datasets/GMSL.csv', delimiter=',')

In [184]:
df_GMSL.head().append(df_GMSL.tail())

Unnamed: 0,Date,GMSL
0,1969-01,-35.8
1,1969-02,-37.2
2,1969-03,-36.2
3,1969-04,-37.4
4,1969-05,-38.6
535,2013-08,71.6
536,2013-09,68.8
537,2013-10,66.4
538,2013-11,59.7
539,2013-12,58.5


In [185]:
GMSL_rise_arr = []
GMSL_rise_arr.append(0)

for index in df_GMSL.index:
    #print(df['GMSL'][index], df['GMSL'][index + 1])
    if(index == df_GMSL.tail(1).index.item()):
        if(df_GMSL['GMSL'][index] > df_GMSL['GMSL'][index - 1]):
            #GMSL_rise_arr.append(1)
            continue
        else:
            #GMSL_rise_arr.append(0)
            continue
    else:
        if(df_GMSL['GMSL'][index + 1] > df_GMSL['GMSL'][index]):
            GMSL_rise_arr.append(1)
        else:
            GMSL_rise_arr.append(0)
            
df_GMSL.insert(2, "IsGMSLIncreased", GMSL_rise_arr, True)

In [186]:
df_final = df_GMSL

In [187]:
df_final.drop('GMSL', axis = 1, inplace = True)

In [188]:
df_final.head().append(df_final.tail())

Unnamed: 0,Date,IsGMSLIncreased
0,1969-01,0
1,1969-02,0
2,1969-03,1
3,1969-04,0
4,1969-05,0
535,2013-08,1
536,2013-09,0
537,2013-10,0
538,2013-11,0
539,2013-12,0


In [189]:
df_Extent = pd.read_csv('processed_datasets/Extent.csv', delimiter=',')

In [190]:
df_Extent.head().append(df_Extent.tail())

Unnamed: 0,Date,Extent
0,1969-01,11.412998
1,1969-02,11.41918
2,1969-03,11.42531
3,1969-04,11.431388
4,1969-05,11.437415
535,2013-08,12.338645
536,2013-09,12.298467
537,2013-10,13.236306
538,2013-11,13.4056
539,2013-12,13.066


In [191]:
df_final = df_final.merge(df_Extent,left_on = 'Date', right_on = 'Date')

In [192]:
df_final.head().append(df_final.tail())

Unnamed: 0,Date,IsGMSLIncreased,Extent
0,1969-01,0,11.412998
1,1969-02,0,11.41918
2,1969-03,1,11.42531
3,1969-04,0,11.431388
4,1969-05,0,11.437415
535,2013-08,1,12.338645
536,2013-09,0,12.298467
537,2013-10,0,13.236306
538,2013-11,0,13.4056
539,2013-12,0,13.066


In [193]:
df_Water = pd.read_csv('processed_datasets/WaterTemp_O2ml.csv', delimiter=',')

In [194]:
df_Water.head().append(df_Water.tail())

Unnamed: 0,Date,WaterTemp,O2ml
0,1969-01,13.265,5.595
1,1969-02,12.435,5.695
2,1969-03,15.535,5.49
3,1969-04,12.07,5.3125
4,1969-05,12.12,5.945
535,2013-08,12.053062,5.263625
536,2013-09,12.200875,5.30275
537,2013-10,12.348688,5.341875
538,2013-11,12.4965,5.381
539,2013-12,13.06225,5.42725


In [195]:
df_final = df_final.merge(df_Water,left_on = 'Date', right_on = 'Date')

In [196]:
df_final.head().append(df_final.tail())

Unnamed: 0,Date,IsGMSLIncreased,Extent,WaterTemp,O2ml
0,1969-01,0,11.412998,13.265,5.595
1,1969-02,0,11.41918,12.435,5.695
2,1969-03,1,11.42531,15.535,5.49
3,1969-04,0,11.431388,12.07,5.3125
4,1969-05,0,11.437415,12.12,5.945
535,2013-08,1,12.338645,12.053062,5.263625
536,2013-09,0,12.298467,12.200875,5.30275
537,2013-10,0,13.236306,12.348688,5.341875
538,2013-11,0,13.4056,12.4965,5.381
539,2013-12,0,13.066,13.06225,5.42725


In [197]:
df_Temp = pd.read_csv('processed_datasets/LandAverageTemp_LandAndOceanAverageTemp.csv', delimiter=',')

In [198]:
df_Temp.head().append(df_Temp.tail())

Unnamed: 0,Date,LandAverageTemperature,LandAndOceanAverageTemperature
0,1969-01,1.966,13.518
1,1969-02,2.45,13.747
2,1969-03,5.131,14.576
3,1969-04,8.576,15.518
4,1969-05,11.354,16.329
535,2013-08,14.742,17.462
536,2013-09,13.154,16.894
537,2013-10,10.256,15.905
538,2013-11,7.424,15.107
539,2013-12,4.724,14.339


In [199]:
df_final = df_final.merge(df_Temp,left_on = 'Date', right_on = 'Date')

In [200]:
df_final.head().append(df_final.tail())

Unnamed: 0,Date,IsGMSLIncreased,Extent,WaterTemp,O2ml,LandAverageTemperature,LandAndOceanAverageTemperature
0,1969-01,0,11.412998,13.265,5.595,1.966,13.518
1,1969-02,0,11.41918,12.435,5.695,2.45,13.747
2,1969-03,1,11.42531,15.535,5.49,5.131,14.576
3,1969-04,0,11.431388,12.07,5.3125,8.576,15.518
4,1969-05,0,11.437415,12.12,5.945,11.354,16.329
535,2013-08,1,12.338645,12.053062,5.263625,14.742,17.462
536,2013-09,0,12.298467,12.200875,5.30275,13.154,16.894
537,2013-10,0,13.236306,12.348688,5.341875,10.256,15.905
538,2013-11,0,13.4056,12.4965,5.381,7.424,15.107
539,2013-12,0,13.066,13.06225,5.42725,4.724,14.339


In [201]:
df_CO2 = pd.read_csv('processed_datasets/CO2.csv', delimiter=',')

In [202]:
df_CO2.head().append(df_CO2.tail())

Unnamed: 0,Date,CO2
0,1969-01,324.0
1,1969-02,324.42
2,1969-03,325.64
3,1969-04,326.66
4,1969-05,327.34
535,2013-08,395.2
536,2013-09,393.45
537,2013-10,393.7
538,2013-11,395.16
539,2013-12,396.84


In [203]:
df_final = df_final.merge(df_CO2,left_on = 'Date', right_on = 'Date')

In [204]:
df_final.head().append(df_final.tail())

Unnamed: 0,Date,IsGMSLIncreased,Extent,WaterTemp,O2ml,LandAverageTemperature,LandAndOceanAverageTemperature,CO2
0,1969-01,0,11.412998,13.265,5.595,1.966,13.518,324.0
1,1969-02,0,11.41918,12.435,5.695,2.45,13.747,324.42
2,1969-03,1,11.42531,15.535,5.49,5.131,14.576,325.64
3,1969-04,0,11.431388,12.07,5.3125,8.576,15.518,326.66
4,1969-05,0,11.437415,12.12,5.945,11.354,16.329,327.34
535,2013-08,1,12.338645,12.053062,5.263625,14.742,17.462,395.2
536,2013-09,0,12.298467,12.200875,5.30275,13.154,16.894,393.45
537,2013-10,0,13.236306,12.348688,5.341875,10.256,15.905,393.7
538,2013-11,0,13.4056,12.4965,5.381,7.424,15.107,395.16
539,2013-12,0,13.066,13.06225,5.42725,4.724,14.339,396.84


In [205]:
df_final = df_final[['Date', 'Extent', 'WaterTemp', 'O2ml', 'LandAverageTemperature', 
                     'LandAndOceanAverageTemperature', 'CO2', 'IsGMSLIncreased']]

In [206]:
df_final.head().append(df_final.tail())

Unnamed: 0,Date,Extent,WaterTemp,O2ml,LandAverageTemperature,LandAndOceanAverageTemperature,CO2,IsGMSLIncreased
0,1969-01,11.412998,13.265,5.595,1.966,13.518,324.0,0
1,1969-02,11.41918,12.435,5.695,2.45,13.747,324.42,0
2,1969-03,11.42531,15.535,5.49,5.131,14.576,325.64,1
3,1969-04,11.431388,12.07,5.3125,8.576,15.518,326.66,0
4,1969-05,11.437415,12.12,5.945,11.354,16.329,327.34,0
535,2013-08,12.338645,12.053062,5.263625,14.742,17.462,395.2,1
536,2013-09,12.298467,12.200875,5.30275,13.154,16.894,393.45,0
537,2013-10,13.236306,12.348688,5.341875,10.256,15.905,393.7,0
538,2013-11,13.4056,12.4965,5.381,7.424,15.107,395.16,0
539,2013-12,13.066,13.06225,5.42725,4.724,14.339,396.84,0


In [207]:
df_final['IsGMSLIncreased'].value_counts()

1    284
0    256
Name: IsGMSLIncreased, dtype: int64

In [208]:
df_final.to_csv('processed_datasets/GMSL_analysis_and_prediction.csv', index = False)