In [8]:
import zipfile

import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

with zipfile.ZipFile("archive.zip") as z:
    with z.open("GlobalLandTemperaturesByMajorCity.csv") as f:
        df = pd.read_csv(f, parse_dates=["dt"])

df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1849-01-01,26.704,1.435,Abidjan,Côte D'Ivoire,5.63N,3.23W
1,1849-02-01,27.434,1.362,Abidjan,Côte D'Ivoire,5.63N,3.23W
2,1849-03-01,28.101,1.612,Abidjan,Côte D'Ivoire,5.63N,3.23W
3,1849-04-01,26.14,1.387,Abidjan,Côte D'Ivoire,5.63N,3.23W
4,1849-05-01,25.427,1.2,Abidjan,Côte D'Ivoire,5.63N,3.23W


In [9]:
import numpy as np

df['NumLatitude'] = pd.to_numeric(df['Latitude'].str[:-1]) * np.where(df['Latitude'].str[-1] == 'N', 1, -1)
df['NumLongitude'] = pd.to_numeric(df['Longitude'].str[:-1]) * np.where(df['Longitude'].str[-1] == 'E', 1, -1)
df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,NumLatitude,NumLongitude
0,1849-01-01,26.704,1.435,Abidjan,Côte D'Ivoire,5.63N,3.23W,5.63,-3.23
1,1849-02-01,27.434,1.362,Abidjan,Côte D'Ivoire,5.63N,3.23W,5.63,-3.23
2,1849-03-01,28.101,1.612,Abidjan,Côte D'Ivoire,5.63N,3.23W,5.63,-3.23
3,1849-04-01,26.14,1.387,Abidjan,Côte D'Ivoire,5.63N,3.23W,5.63,-3.23
4,1849-05-01,25.427,1.2,Abidjan,Côte D'Ivoire,5.63N,3.23W,5.63,-3.23


In [10]:
df = df.loc[(df['dt'] >= '1970-01-01') & (df['dt'] <= '1980-01-01')]
df = df[['dt', 'AverageTemperature', 'City', 'NumLatitude', 'NumLongitude']]
df.head()

Unnamed: 0,dt,AverageTemperature,City,NumLatitude,NumLongitude
1452,1970-01-01,27.183,Abidjan,5.63,-3.23
1453,1970-02-01,28.436,Abidjan,5.63,-3.23
1454,1970-03-01,28.4,Abidjan,5.63,-3.23
1455,1970-04-01,27.9,Abidjan,5.63,-3.23
1456,1970-05-01,26.901,Abidjan,5.63,-3.23


In [11]:
df = df.loc[df['dt'].dt.month == 1].groupby(['City']).agg(AverageTemperature=('AverageTemperature', 'mean'), Latitude=('NumLatitude', 'mean'), Longitude=('NumLongitude', 'mean'))
df.head()

Unnamed: 0_level_0,AverageTemperature,Latitude,Longitude
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Abidjan,26.985273,5.63,-3.23
Addis Abeba,17.065818,8.84,38.11
Ahmadabad,19.517,23.31,72.52
Aleppo,5.422727,36.17,37.79
Alexandria,14.274364,31.35,30.16


In [12]:
df['City'] = df.index
df = df.merge(right=df, how='cross')
df

Unnamed: 0,AverageTemperature_x,Latitude_x,Longitude_x,City_x,AverageTemperature_y,Latitude_y,Longitude_y,City_y
0,26.985273,5.63,-3.23,Abidjan,26.985273,5.63,-3.23,Abidjan
1,26.985273,5.63,-3.23,Abidjan,17.065818,8.84,38.11,Addis Abeba
2,26.985273,5.63,-3.23,Abidjan,19.517000,23.31,72.52,Ahmadabad
3,26.985273,5.63,-3.23,Abidjan,5.422727,36.17,37.79,Aleppo
4,26.985273,5.63,-3.23,Abidjan,14.274364,31.35,30.16,Alexandria
...,...,...,...,...,...,...,...,...
9995,-2.064364,34.56,108.97,Xian,1.545273,36.17,139.23,Tokyo
9996,-2.064364,34.56,108.97,Xian,-8.626636,44.20,-80.50,Toronto
9997,-2.064364,34.56,108.97,Xian,22.346727,15.27,32.50,Umm Durman
9998,-2.064364,34.56,108.97,Xian,4.128091,29.74,114.46,Wuhan


In [15]:
df['Distance'] = np.sqrt((df['Latitude_x'] - df['Latitude_y']) ** 2 + (df['Longitude_x'] - df['Longitude_y']) ** 2)
df

Unnamed: 0,AverageTemperature_x,Latitude_x,Longitude_x,City_x,AverageTemperature_y,Latitude_y,Longitude_y,City_y,Distance
0,26.985273,5.63,-3.23,Abidjan,26.985273,5.63,-3.23,Abidjan,0.000000
1,26.985273,5.63,-3.23,Abidjan,17.065818,8.84,38.11,Addis Abeba,41.464439
2,26.985273,5.63,-3.23,Abidjan,19.517000,23.31,72.52,Ahmadabad,77.785891
3,26.985273,5.63,-3.23,Abidjan,5.422727,36.17,37.79,Aleppo,51.140317
4,26.985273,5.63,-3.23,Abidjan,14.274364,31.35,30.16,Alexandria,42.147485
...,...,...,...,...,...,...,...,...,...
9995,-2.064364,34.56,108.97,Xian,1.545273,36.17,139.23,Tokyo,30.302800
9996,-2.064364,34.56,108.97,Xian,-8.626636,44.20,-80.50,Toronto,189.715077
9997,-2.064364,34.56,108.97,Xian,22.346727,15.27,32.50,Umm Durman,78.865487
9998,-2.064364,34.56,108.97,Xian,4.128091,29.74,114.46,Wuhan,7.305648


In [18]:
df['Temp'] = np.abs(df['AverageTemperature_x'] - df['AverageTemperature_y'])
df

Unnamed: 0,AverageTemperature_x,Latitude_x,Longitude_x,City_x,AverageTemperature_y,Latitude_y,Longitude_y,City_y,Distance,Temp
0,26.985273,5.63,-3.23,Abidjan,26.985273,5.63,-3.23,Abidjan,0.000000,0.000000
1,26.985273,5.63,-3.23,Abidjan,17.065818,8.84,38.11,Addis Abeba,41.464439,9.919455
2,26.985273,5.63,-3.23,Abidjan,19.517000,23.31,72.52,Ahmadabad,77.785891,7.468273
3,26.985273,5.63,-3.23,Abidjan,5.422727,36.17,37.79,Aleppo,51.140317,21.562545
4,26.985273,5.63,-3.23,Abidjan,14.274364,31.35,30.16,Alexandria,42.147485,12.710909
...,...,...,...,...,...,...,...,...,...,...
9995,-2.064364,34.56,108.97,Xian,1.545273,36.17,139.23,Tokyo,30.302800,3.609636
9996,-2.064364,34.56,108.97,Xian,-8.626636,44.20,-80.50,Toronto,189.715077,6.562273
9997,-2.064364,34.56,108.97,Xian,22.346727,15.27,32.50,Umm Durman,78.865487,24.411091
9998,-2.064364,34.56,108.97,Xian,4.128091,29.74,114.46,Wuhan,7.305648,6.192455


In [24]:
df.nlargest(columns='Temp', n=1)

Unnamed: 0,AverageTemperature_x,Latitude_x,Longitude_x,City_x,AverageTemperature_y,Latitude_y,Longitude_y,City_y,Distance,Temp
2534,27.458364,-7.23,39.73,Dar Es Salaam,-18.778455,45.81,125.77,Harbin,101.07484,46.236818


In [27]:
df.loc[df['Distance'] != 0].nsmallest(columns='Distance', n=1)

Unnamed: 0,AverageTemperature_x,Latitude_x,Longitude_x,City_x,AverageTemperature_y,Latitude_y,Longitude_y,City_y,Distance,Temp
1116,20.418,4.02,-74.73,Bogotá,22.261,4.02,-76.34,Cali,1.61,1.843
