In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

## Loading Dataset

In [2]:
data = pd.read_csv("/content/Country _Temperature.csv",encoding="latin1")

In [3]:
data.head()

Unnamed: 0,Area Code,Area,Months Code,Months,Element Code,Element,Unit,Y1961,Y1962,Y1963,...,Y2010,Y2011,Y2012,Y2013,Y2014,Y2015,Y2016,Y2017,Y2018,Y2019
0,2,Afghanistan,7001,January,7271,Temperature change,°C,0.777,0.062,2.744,...,3.601,1.179,-0.583,1.233,1.755,1.943,3.416,1.201,1.996,2.951
1,2,Afghanistan,7001,January,6078,Standard Deviation,°C,1.95,1.95,1.95,...,1.95,1.95,1.95,1.95,1.95,1.95,1.95,1.95,1.95,1.95
2,2,Afghanistan,7002,February,7271,Temperature change,°C,-1.743,2.465,3.919,...,1.212,0.321,-3.201,1.494,-3.187,2.699,2.251,-0.323,2.705,0.086
3,2,Afghanistan,7002,February,6078,Standard Deviation,°C,2.597,2.597,2.597,...,2.597,2.597,2.597,2.597,2.597,2.597,2.597,2.597,2.597,2.597
4,2,Afghanistan,7003,March,7271,Temperature change,°C,0.516,1.336,0.403,...,3.39,0.748,-0.527,2.246,-0.076,-0.497,2.296,0.834,4.418,0.234


## Feature Selection

In [4]:
#get summary of data
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9656 entries, 0 to 9655
Data columns (total 66 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Area Code     9656 non-null   int64  
 1   Area          9656 non-null   object 
 2   Months Code   9656 non-null   int64  
 3   Months        9656 non-null   object 
 4   Element Code  9656 non-null   int64  
 5   Element       9656 non-null   object 
 6   Unit          9656 non-null   object 
 7   Y1961         8287 non-null   float64
 8   Y1962         8322 non-null   float64
 9   Y1963         8294 non-null   float64
 10  Y1964         8252 non-null   float64
 11  Y1965         8281 non-null   float64
 12  Y1966         8364 non-null   float64
 13  Y1967         8347 non-null   float64
 14  Y1968         8345 non-null   float64
 15  Y1969         8326 non-null   float64
 16  Y1970         8308 non-null   float64
 17  Y1971         8303 non-null   float64
 18  Y1972         8323 non-null 

In [5]:
#getting values where element is Temperature change
data=data[data["Element"]=="Temperature change"]

In [6]:
#filling null values
data = data.fillna(0)

In [7]:
data.columns

Index(['Area Code', 'Area', 'Months Code', 'Months', 'Element Code', 'Element',
       'Unit', 'Y1961', 'Y1962', 'Y1963', 'Y1964', 'Y1965', 'Y1966', 'Y1967',
       'Y1968', 'Y1969', 'Y1970', 'Y1971', 'Y1972', 'Y1973', 'Y1974', 'Y1975',
       'Y1976', 'Y1977', 'Y1978', 'Y1979', 'Y1980', 'Y1981', 'Y1982', 'Y1983',
       'Y1984', 'Y1985', 'Y1986', 'Y1987', 'Y1988', 'Y1989', 'Y1990', 'Y1991',
       'Y1992', 'Y1993', 'Y1994', 'Y1995', 'Y1996', 'Y1997', 'Y1998', 'Y1999',
       'Y2000', 'Y2001', 'Y2002', 'Y2003', 'Y2004', 'Y2005', 'Y2006', 'Y2007',
       'Y2008', 'Y2009', 'Y2010', 'Y2011', 'Y2012', 'Y2013', 'Y2014', 'Y2015',
       'Y2016', 'Y2017', 'Y2018', 'Y2019'],
      dtype='object')

In [8]:
#Turn years columns into rows using melt function
data=data.melt(id_vars=['Area Code', 'Area', 'Months Code', 'Months', 'Element Code', 'Element',
       'Unit'],var_name='Year', value_name='Temp Change')
data.head()

Unnamed: 0,Area Code,Area,Months Code,Months,Element Code,Element,Unit,Year,Temp Change
0,2,Afghanistan,7001,January,7271,Temperature change,°C,Y1961,0.777
1,2,Afghanistan,7002,February,7271,Temperature change,°C,Y1961,-1.743
2,2,Afghanistan,7003,March,7271,Temperature change,°C,Y1961,0.516
3,2,Afghanistan,7004,April,7271,Temperature change,°C,Y1961,-1.709
4,2,Afghanistan,7005,May,7271,Temperature change,°C,Y1961,1.412


In [9]:
data['Unit'].value_counts()

°C    284852
Name: Unit, dtype: int64

In [10]:
# dropping 'Months Code','Element Code', 'Unit' columns
data.drop(['Months Code','Element Code', 'Unit'], axis=1, inplace=True)

In [11]:
data.head()

Unnamed: 0,Area Code,Area,Months,Element,Year,Temp Change
0,2,Afghanistan,January,Temperature change,Y1961,0.777
1,2,Afghanistan,February,Temperature change,Y1961,-1.743
2,2,Afghanistan,March,Temperature change,Y1961,0.516
3,2,Afghanistan,April,Temperature change,Y1961,-1.709
4,2,Afghanistan,May,Temperature change,Y1961,1.412


In [12]:
#years without the Y letter
data['Year'] = data['Year'].str[1:].astype('str')
data

Unnamed: 0,Area Code,Area,Months,Element,Year,Temp Change
0,2,Afghanistan,January,Temperature change,1961,0.777
1,2,Afghanistan,February,Temperature change,1961,-1.743
2,2,Afghanistan,March,Temperature change,1961,0.516
3,2,Afghanistan,April,Temperature change,1961,-1.709
4,2,Afghanistan,May,Temperature change,1961,1.412
...,...,...,...,...,...,...
284847,5873,OECD,DecJanFeb,Temperature change,2019,1.527
284848,5873,OECD,MarAprMay,Temperature change,2019,1.352
284849,5873,OECD,JunJulAug,Temperature change,2019,1.078
284850,5873,OECD,SepOctNov,Temperature change,2019,1.233


In [13]:
#Get the unique values of columns Months
data["Months"].unique()

array(['January', 'February', 'March', 'April', 'May', 'June', 'July',
       'August', 'September', 'October', 'November', 'December',
       'Dec\x96Jan\x96Feb', 'Mar\x96Apr\x96May', 'Jun\x96Jul\x96Aug',
       'Sep\x96Oct\x96Nov', 'Meteorological year'], dtype=object)

In [14]:
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July',
       'August', 'September', 'October', 'November', 'December']

In [15]:
#Get values where months is in : 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'
data = data[data['Months'].isin(months)]

In [16]:
data['Months'].unique()

array(['January', 'February', 'March', 'April', 'May', 'June', 'July',
       'August', 'September', 'October', 'November', 'December'],
      dtype=object)

In [17]:
data.head()

Unnamed: 0,Area Code,Area,Months,Element,Year,Temp Change
0,2,Afghanistan,January,Temperature change,1961,0.777
1,2,Afghanistan,February,Temperature change,1961,-1.743
2,2,Afghanistan,March,Temperature change,1961,0.516
3,2,Afghanistan,April,Temperature change,1961,-1.709
4,2,Afghanistan,May,Temperature change,1961,1.412


In [18]:
# grouping data
data = data.groupby(['Area', 'Year']).mean().reset_index()

In [19]:
data.head()

Unnamed: 0,Area,Year,Area Code,Temp Change
0,Afghanistan,1961,2.0,0.07275
1,Afghanistan,1962,2.0,-0.229833
2,Afghanistan,1963,2.0,0.891583
3,Afghanistan,1964,2.0,-0.98
4,Afghanistan,1965,2.0,0.01


In [20]:
data

Unnamed: 0,Area,Year,Area Code,Temp Change
0,Afghanistan,1961,2.0,0.072750
1,Afghanistan,1962,2.0,-0.229833
2,Afghanistan,1963,2.0,0.891583
3,Afghanistan,1964,2.0,-0.980000
4,Afghanistan,1965,2.0,0.010000
...,...,...,...,...
16751,Zimbabwe,2015,181.0,1.258333
16752,Zimbabwe,2016,181.0,1.164583
16753,Zimbabwe,2017,181.0,0.224833
16754,Zimbabwe,2018,181.0,0.547167


In [21]:
#  getting top 10 and bottom 10 countries 
top_10 = data.groupby('Area').sum().sort_values('Temp Change', ascending=False)[:10].reset_index()['Area']
bottom_10 = data.groupby('Area').sum().sort_values('Temp Change', ascending=True)[:10].reset_index()['Area']

In [22]:
top_10

0                          Mongolia
1    Svalbard and Jan Mayen Islands
2                            Gambia
3                           Morocco
4                     Guinea-Bissau
5                           Tunisia
6                        Mauritania
7                    Eastern Europe
8                           Austria
9                           Senegal
Name: Area, dtype: object

In [23]:
bottom_10

0                              Nauru
1                   Pitcairn Islands
2                      Midway Island
3        Falkland Islands (Malvinas)
4                       Yugoslav SFR
5    Pacific Islands Trust Territory
6                   Marshall Islands
7                          Singapore
8                     Czechoslovakia
9                               Niue
Name: Area, dtype: object

In [24]:
countries = top_10.append(bottom_10)

In [25]:
data=data[data['Area'].isin(countries)]

In [26]:
#creating dataframe storing  countries with highest temprature
data1=data[data['Area'].isin(top_10)].reset_index()

In [27]:
data1

Unnamed: 0,index,Area,Year,Area Code,Temp Change
0,1062,Austria,1961,11.0,0.843750
1,1063,Austria,1962,11.0,-0.864833
2,1064,Austria,1963,11.0,-0.699083
3,1065,Austria,1964,11.0,-0.104167
4,1066,Austria,1965,11.0,-0.719500
...,...,...,...,...,...
585,15099,Tunisia,2015,222.0,1.349083
586,15100,Tunisia,2016,222.0,2.125917
587,15101,Tunisia,2017,222.0,1.341583
588,15102,Tunisia,2018,222.0,1.793500


In [28]:
data1 = data1.groupby(['Area']).max().reset_index()

In [29]:
data1['Area'].unique()

array(['Austria', 'Eastern Europe', 'Gambia', 'Guinea-Bissau',
       'Mauritania', 'Mongolia', 'Morocco', 'Senegal',
       'Svalbard and Jan Mayen Islands', 'Tunisia'], dtype=object)

In [30]:
data1

Unnamed: 0,Area,index,Year,Area Code,Temp Change
0,Austria,1120,2019,11.0,2.571917
1,Eastern Europe,4601,2019,5401.0,2.424333
2,Gambia,5840,2019,75.0,2.027583
3,Guinea-Bissau,6489,2019,175.0,2.094083
4,Mauritania,9026,2019,136.0,1.886833
5,Mongolia,9616,2019,141.0,2.735583
6,Morocco,9793,2019,143.0,2.2995
7,Senegal,13038,2019,195.0,1.71175
8,Svalbard and Jan Mayen Islands,14454,2019,260.0,5.454083
9,Tunisia,15103,2019,222.0,2.125917


In [31]:
year=data[data["Year"]=="1961"].sort_values('Area Code').reset_index()
year2=data[data["Year"]=="2019"].sort_values('Area Code').reset_index()

In [32]:
year

Unnamed: 0,index,Area,Year,Area Code,Temp Change
0,1062,Austria,1961,11.0,0.84375
1,3953,Czechoslovakia,1961,51.0,0.76825
2,5251,Falkland Islands (Malvinas),1961,65.0,-0.254917
3,5782,Gambia,1961,75.0,-0.155
4,8850,Marshall Islands,1961,127.0,0.0
5,8968,Mauritania,1961,136.0,0.15775
6,9440,Midway Island,1961,139.0,0.21675
7,9558,Mongolia,1961,141.0,0.267083
8,9735,Morocco,1961,143.0,0.874333
9,9971,Nauru,1961,148.0,-0.34925


In [33]:
year2

Unnamed: 0,index,Area,Year,Area Code,Temp Change
0,1120,Austria,2019,11.0,2.463333
1,4011,Czechoslovakia,2019,51.0,0.0
2,5309,Falkland Islands (Malvinas),2019,65.0,0.0
3,5840,Gambia,2019,75.0,1.640583
4,8908,Marshall Islands,2019,127.0,0.419167
5,9026,Mauritania,2019,136.0,1.435667
6,9498,Midway Island,2019,139.0,0.0
7,9616,Mongolia,2019,141.0,2.15575
8,9793,Morocco,2019,143.0,1.7325
9,10029,Nauru,2019,148.0,0.0


In [34]:
overall=year[['Area','Temp Change']]

In [35]:
overall['Temp Change2']=year2['Temp Change']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [36]:
overall['drop']=overall['Temp Change']-overall["Temp Change2"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [37]:
#dataframe for overall drop in temprature
overall

Unnamed: 0,Area,Temp Change,Temp Change2,drop
0,Austria,0.84375,2.463333,-1.619583
1,Czechoslovakia,0.76825,0.0,0.76825
2,Falkland Islands (Malvinas),-0.254917,0.0,-0.254917
3,Gambia,-0.155,1.640583,-1.795583
4,Marshall Islands,0.0,0.419167,-0.419167
5,Mauritania,0.15775,1.435667,-1.277917
6,Midway Island,0.21675,0.0,0.21675
7,Mongolia,0.267083,2.15575,-1.888667
8,Morocco,0.874333,1.7325,-0.858167
9,Nauru,-0.34925,0.0,-0.34925


In [38]:
overall1=overall[overall['drop']>0]

In [39]:
overall1

Unnamed: 0,Area,Temp Change,Temp Change2,drop
1,Czechoslovakia,0.76825,0.0,0.76825
6,Midway Island,0.21675,0.0,0.21675
10,Niue,0.183583,0.0,0.183583
12,Pitcairn Islands,0.614417,0.0,0.614417
15,Singapore,0.025083,0.0,0.025083
17,Yugoslav SFR,0.59475,0.0,0.59475


In [40]:
rise1=data[data["Year"]=="1961"].sort_values('Area Code').reset_index()
rise2=data[data["Year"]=="2000"].sort_values("Area Code").reset_index()
rise3=data[data["Year"]=="2019"].sort_values('Area Code').reset_index()

In [41]:
c1=rise1[['Area','Temp Change']]
c1["Temp Change2"]=rise2['Temp Change']
c1['Temp Change3']=rise3['Temp Change']
c1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Area,Temp Change,Temp Change2,Temp Change3
0,Austria,0.84375,1.960833,2.463333
1,Czechoslovakia,0.76825,0.0,0.0
2,Falkland Islands (Malvinas),-0.254917,0.0,0.0
3,Gambia,-0.155,0.924167,1.640583
4,Marshall Islands,0.0,-0.263583,0.419167
5,Mauritania,0.15775,1.197667,1.435667
6,Midway Island,0.21675,0.0,0.0
7,Mongolia,0.267083,0.847417,2.15575
8,Morocco,0.874333,1.2215,1.7325
9,Nauru,-0.34925,0.0,0.0


In [42]:
c1['drop1']=c1['Temp Change2']-c1['Temp Change']

In [43]:
c1

Unnamed: 0,Area,Temp Change,Temp Change2,Temp Change3,drop1
0,Austria,0.84375,1.960833,2.463333,1.117083
1,Czechoslovakia,0.76825,0.0,0.0,-0.76825
2,Falkland Islands (Malvinas),-0.254917,0.0,0.0,0.254917
3,Gambia,-0.155,0.924167,1.640583,1.079167
4,Marshall Islands,0.0,-0.263583,0.419167,-0.263583
5,Mauritania,0.15775,1.197667,1.435667,1.039917
6,Midway Island,0.21675,0.0,0.0,-0.21675
7,Mongolia,0.267083,0.847417,2.15575,0.580333
8,Morocco,0.874333,1.2215,1.7325,0.347167
9,Nauru,-0.34925,0.0,0.0,0.34925


In [44]:
c2=c1[c1['drop1']<0.5]

In [45]:
c2=c2[c2['drop1']>-0.5]

In [46]:
c2['rise']=c2['Temp Change3']-c2['Temp Change2']

In [47]:
c2

Unnamed: 0,Area,Temp Change,Temp Change2,Temp Change3,drop1,rise
2,Falkland Islands (Malvinas),-0.254917,0.0,0.0,0.254917,0.0
4,Marshall Islands,0.0,-0.263583,0.419167,-0.263583,0.68275
6,Midway Island,0.21675,0.0,0.0,-0.21675,0.0
8,Morocco,0.874333,1.2215,1.7325,0.347167,0.511
9,Nauru,-0.34925,0.0,0.0,0.34925,0.0
10,Niue,0.183583,0.0,0.0,-0.183583,0.0
11,Pacific Islands Trust Territory,-0.003583,0.0,0.0,0.003583,0.0
12,Pitcairn Islands,0.614417,0.51775,0.0,-0.096667,-0.51775
15,Singapore,0.025083,0.0,0.0,-0.025083,0.0
19,Eastern Europe,0.712417,1.009417,2.40175,0.297,1.392333


In [48]:
c3=c2[c2['rise']>0.6]

In [49]:
c3

Unnamed: 0,Area,Temp Change,Temp Change2,Temp Change3,drop1,rise
4,Marshall Islands,0.0,-0.263583,0.419167,-0.263583,0.68275
19,Eastern Europe,0.712417,1.009417,2.40175,0.297,1.392333


## Data Visualization

In [50]:
fig1=px.bar(data,x='Temp Change',y='Area',animation_frame='Year', hover_name='Temp Change',
      range_x=[-3.5,5.5], color='Area')
fig1.show()

# Conclusion fig. 1
The above graph showng 20 countries temperature change from 1961 to 2019.
the graph shows that as compared to 1961 some countries experienced an abnormal rise in the temprature  in 2019 whereas som countries experienced a drop in temprature change.**bold text**

In [51]:
fig2=px.bar(data1,x='Area',y='Temp Change',title="TOP 10 COUNTRIES HAVE HIGNEST TEMPERATURE")
fig2.show()

# Conclusion fig. 2
The above graph represents top 10 countries having highest temperature over deades.
it can be seen that Svalbard and Jan Mayen Islands experienced the highest temperature i.e. 5.454083.

In [52]:
fig3=px.bar(overall1,x='Area',y='drop',title="countries experiencing overall drop over years")
fig3.show()

# Conclusion fig. 3
the above figure represent the countries thet have experienced an overall drop in temperature over years.
Czechoslovakia has experience the highest drop i.e 0.76825 as compared to its temperature in 1961.
Singapre has experience a very less drop in temprature i.e. 0.02508333 as compared to its temperature in 1961.
the second most country that experienced the the overall drop is Pitcairn Islands i.e. 0.6144167. Countries like Yugoslav SFR, Midway Island, Niue have also experienced overall drop in temperature as compared to their temperature in 1961

In [53]:
fig4 = px.bar(c2, x="Area", y="drop1", color='Area', title='Countries maintaing temperature till 2000')
fig4.show()

# Conclusion fig. 4
The above figure reprensts the countries that maintain their tempersture till 2000.
These countries have maintained their temperature from 1961 within range  -0.5 to 0.5 which can be considered as no major change in their temperature

In [54]:
fig5 = px.bar(c3, x="Area", y="rise", color='Area', title='countries experiencing abnormal rise in temperature after 2000 after maintainimg temperature till 2000 ')
fig5.show()

# Conclusion fig.5
the above figure reprents the countries which shows abnomal rise in temperature or rise in temperatur at alarming rate after maintaing their temperature till 2000.
Eastern Europe shows a rise in temperature i.e 1.392333 after maintaing its temperature till 2000 ,in 2000 the temperature of Eastern Europe was 1.009417 amd in 2019 its temperature rise to 2.401750.
Similarly Marshall Island shows a rise  in temperature i.e. 0.68275 after maintaing its temperature till 2000, in 2000 the temperature of Marshall Island was -0.263583 amd in 2019 its temperature rise to 0.419167. 