<a href="https://colab.research.google.com/github/CristiSavca/DataScience-MicroCredential/blob/main/Climate_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Sources cited: https://www.kaggle.com/code/amelinvladislav/map-of-temperatures-and-analysis-of-global-warming

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from copy import copy
import plotly.express as px
import plotly.offline as py
import plotly.graph_objs as go
import plotly.tools as tls
import seaborn as sns

import tensorflow as tf
from tensorflow import keras



df = pd.read_csv('https://raw.githubusercontent.com/CristiSavca/ML_Data/main/GlobalLandTemperaturesByCountry%20-%20GlobalLandTemperaturesByCountry.csv')

In [2]:
df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


In [3]:
len(df)

577462

In [4]:
df.columns

Index(['dt', 'AverageTemperature', 'AverageTemperatureUncertainty', 'Country'], dtype='object')

# EDA

In [5]:
df['Country'].unique()

array(['Åland', 'Afghanistan', 'Africa', 'Albania', 'Algeria',
       'American Samoa', 'Andorra', 'Angola', 'Anguilla', 'Antarctica',
       'Antigua And Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Asia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Baker Island', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
       'Belize', 'Benin', 'Bhutan', 'Bolivia',
       'Bonaire, Saint Eustatius And Saba', 'Bosnia And Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands', 'Bulgaria',
       'Burkina Faso', 'Burma', 'Burundi', "Côte D'Ivoire", 'Cambodia',
       'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands',
       'Central African Republic', 'Chad', 'Chile', 'China',
       'Christmas Island', 'Colombia', 'Comoros',
       'Congo (Democratic Republic Of The)', 'Congo', 'Costa Rica',
       'Croatia', 'Cuba', 'Curaçao', 'Cyprus', 'Czech Republic',
       'Denmark (Europe)', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecu

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 577462 entries, 0 to 577461
Data columns (total 4 columns):
 #   Column                         Non-Null Count   Dtype  
---  ------                         --------------   -----  
 0   dt                             577462 non-null  object 
 1   AverageTemperature             544811 non-null  float64
 2   AverageTemperatureUncertainty  545550 non-null  float64
 3   Country                        577462 non-null  object 
dtypes: float64(2), object(2)
memory usage: 17.6+ MB


In [7]:
df.describe()

Unnamed: 0,AverageTemperature,AverageTemperatureUncertainty
count,544811.0,545550.0
mean,17.193354,1.019057
std,10.953966,1.20193
min,-37.658,0.052
25%,10.025,0.323
50%,20.901,0.571
75%,25.814,1.206
max,38.842,15.003


In [8]:
country_group_df = df.groupby(by = 'Country').count().reset_index('Country').rename(columns={'AverageTemperature':'AverageTemperatureCount', 'AverageTemperatureUncertainty':'AverageTemperatureUncertaintyCount'})

In [9]:
country_group_df.head()

Unnamed: 0,Country,dt,AverageTemperatureCount,AverageTemperatureUncertaintyCount
0,Afghanistan,2106,2085,2085
1,Africa,1965,1894,1894
2,Albania,3239,3166,3166
3,Algeria,2721,2702,2702
4,American Samoa,1761,1629,1629


In [10]:
country_group_df['Country']

0         Afghanistan
1              Africa
2             Albania
3             Algeria
4      American Samoa
            ...      
238    Western Sahara
239             Yemen
240            Zambia
241          Zimbabwe
242             Åland
Name: Country, Length: 243, dtype: object

In [11]:
fig = px.bar(country_group_df, x = 'Country', y = 'AverageTemperatureCount')
fig.show()

In [13]:
fig = px.histogram(country_group_df, x = 'AverageTemperatureCount')
fig.show()

In [14]:
country_group_df[(country_group_df['AverageTemperatureCount'] < 1500) | (country_group_df['AverageTemperatureUncertaintyCount'] < 1500)]

Unnamed: 0,Country,dt,AverageTemperatureCount,AverageTemperatureUncertaintyCount
8,Antarctica,764,0,739
73,Federated States Of Micronesia,1427,1364,1364
80,French Southern And Antarctic Lands,788,783,783
91,Guam,1329,1328,1328
98,Heard Island And Mcdonald Islands,788,783,783
161,Northern Mariana Islands,1329,1328,1328
204,South Georgia And The South Sandwich Isla,1666,1474,1474


In [18]:
countries_with_less_data = country_group_df[(country_group_df['AverageTemperatureCount'] < 1500) | (country_group_df['AverageTemperatureUncertaintyCount'] < 1500)]['Country'].tolist()

In [19]:
countries_with_less_data

['Antarctica',
 'Federated States Of Micronesia',
 'French Southern And Antarctic Lands',
 'Guam',
 'Heard Island And Mcdonald Islands',
 'Northern Mariana Islands',
 'South Georgia And The South Sandwich Isla']

In [20]:
df['Country'].isin(countries_with_less_data)

0         False
1         False
2         False
3         False
4         False
          ...  
577457    False
577458    False
577459    False
577460    False
577461    False
Name: Country, Length: 577462, dtype: bool

In [21]:
df = df[~df['Country'].isin(countries_with_less_data)]

In [22]:
df

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland
...,...,...,...,...
577457,2013-05-01,19.059,1.022,Zimbabwe
577458,2013-06-01,17.613,0.473,Zimbabwe
577459,2013-07-01,17.000,0.453,Zimbabwe
577460,2013-08-01,19.759,0.717,Zimbabwe


In [23]:
df['AverageTemperature'] = df['AverageTemperature'].fillna(df['AverageTemperature'].rolling(730, min_periods = 1).mean())

In [24]:
df.isna().sum()

dt                                   0
AverageTemperature                   0
AverageTemperatureUncertainty    31620
Country                              0
dtype: int64

In [27]:
df['AverageTemperatureUncertainty'] = df['AverageTemperatureUncertainty'].fillna(df['AverageTemperatureUncertainty'].rolling(730, min_periods = 1).mean())

In [28]:
df.isna().sum()

dt                               0
AverageTemperature               0
AverageTemperatureUncertainty    0
Country                          0
dtype: int64

In [29]:
df['Country'].unique()

array(['Åland', 'Afghanistan', 'Africa', 'Albania', 'Algeria',
       'American Samoa', 'Andorra', 'Angola', 'Anguilla',
       'Antigua And Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Asia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Baker Island', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
       'Belize', 'Benin', 'Bhutan', 'Bolivia',
       'Bonaire, Saint Eustatius And Saba', 'Bosnia And Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands', 'Bulgaria',
       'Burkina Faso', 'Burma', 'Burundi', "Côte D'Ivoire", 'Cambodia',
       'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands',
       'Central African Republic', 'Chad', 'Chile', 'China',
       'Christmas Island', 'Colombia', 'Comoros',
       'Congo (Democratic Republic Of The)', 'Congo', 'Costa Rica',
       'Croatia', 'Cuba', 'Curaçao', 'Cyprus', 'Czech Republic',
       'Denmark (Europe)', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecuador', 'Egypt'

In [30]:
duplicates = []
for i in df['Country'].unique():
  if '(' in i:
    duplicates.append(i)

In [31]:
duplicates

['Congo (Democratic Republic Of The)',
 'Denmark (Europe)',
 'Falkland Islands (Islas Malvinas)',
 'France (Europe)',
 'Netherlands (Europe)',
 'United Kingdom (Europe)']

In [33]:
# replace duplicates
df = df.replace(duplicates, ['Congo',
 'Denmark',
 'Falkland Islands',
 'France',
 'Netherlands',
 'United Kingdom'])

In [34]:
df['Country'].unique()

array(['Åland', 'Afghanistan', 'Africa', 'Albania', 'Algeria',
       'American Samoa', 'Andorra', 'Angola', 'Anguilla',
       'Antigua And Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Asia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Baker Island', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
       'Belize', 'Benin', 'Bhutan', 'Bolivia',
       'Bonaire, Saint Eustatius And Saba', 'Bosnia And Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands', 'Bulgaria',
       'Burkina Faso', 'Burma', 'Burundi', "Côte D'Ivoire", 'Cambodia',
       'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands',
       'Central African Republic', 'Chad', 'Chile', 'China',
       'Christmas Island', 'Colombia', 'Comoros', 'Congo', 'Costa Rica',
       'Croatia', 'Cuba', 'Curaçao', 'Cyprus', 'Czech Republic',
       'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador',
       'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
     

In [36]:
countries = df['Country'].unique().tolist()

In [37]:
countries

['Åland',
 'Afghanistan',
 'Africa',
 'Albania',
 'Algeria',
 'American Samoa',
 'Andorra',
 'Angola',
 'Anguilla',
 'Antigua And Barbuda',
 'Argentina',
 'Armenia',
 'Aruba',
 'Asia',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Baker Island',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bhutan',
 'Bolivia',
 'Bonaire, Saint Eustatius And Saba',
 'Bosnia And Herzegovina',
 'Botswana',
 'Brazil',
 'British Virgin Islands',
 'Bulgaria',
 'Burkina Faso',
 'Burma',
 'Burundi',
 "Côte D'Ivoire",
 'Cambodia',
 'Cameroon',
 'Canada',
 'Cape Verde',
 'Cayman Islands',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'Christmas Island',
 'Colombia',
 'Comoros',
 'Congo',
 'Costa Rica',
 'Croatia',
 'Cuba',
 'Curaçao',
 'Cyprus',
 'Czech Republic',
 'Denmark',
 'Djibouti',
 'Dominica',
 'Dominican Republic',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'Equatorial Guinea',
 'Eritrea',
 'Estonia',
 'Ethiopia',
 'Europe',
 'Falkland Islands'

In [39]:
# Get the mean temperature for each company
mean_temperature = []
for i in countries:
  mean_temperature.append(df[df['Country'] == i]['AverageTemperature'].mean())

In [40]:
mean_temperature

[5.253407045650971,
 13.969032500999896,
 23.742513304859663,
 12.868059006848597,
 22.921618511348367,
 26.415697533490643,
 11.543420273585504,
 21.081856794478785,
 26.28328599097268,
 26.490144329965048,
 14.621407346072278,
 9.199305804976174,
 26.528234326993193,
 7.373470573259821,
 21.597363156755243,
 6.553616257515263,
 11.171363673433104,
 23.57770172333342,
 25.924235133962867,
 25.53470677532838,
 24.84190674998119,
 26.371287709954547,
 6.278468335904415,
 9.418498079857274,
 24.878248840314843,
 27.015074990382384,
 11.940860669269968,
 20.877428251970485,
 26.733766132051635,
 10.795574834946803,
 21.85136260006229,
 24.68476976973327,
 26.255617836936572,
 10.837889488469633,
 27.203972767337603,
 23.901255108822976,
 20.513229501055314,
 25.68400374912927,
 26.635809223381855,
 24.56038082215269,
 -1.6628978071506426,
 22.943871060047485,
 26.72250415028397,
 25.423591438743145,
 27.00171149623752,
 9.383679436640453,
 6.761933487452209,
 22.95189630219478,
 24.840096

In [41]:
# Plot mean temperature of countries

data = [ dict(type = 'choropleth', locations = countries, z = mean_temperature, locationmode = 'country names')]

layout = dict(title = 'Average Global Land Temperatures', geo = dict(showframe = False, showocean = True, oceancolor = 'aqua', projection = dict(type = 'orthographic')))

fig = dict(data = data, layout = layout)

py.iplot(fig, validate = False, filename = 'worldmap')

# Logistic Regression Model

# Evaluate LR Model