In [None]:
# Importing the libraries
import numpy as np # Linear Algebra
import matplotlib.pyplot as plt # data visualization
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.graph_objects as go
import plotly.tools as tls
import seaborn as sns
from matplotlib import pyplot as plt

In [None]:
# Reading all the column names
df = pd.read_csv("GlobalLandTemperaturesByMajorCity.csv")
df.columns

In [None]:


total_cities = df.pivot_table(index=['City'], aggfunc='size')
total_countries = df.pivot_table(index=['Country'], aggfunc='size')
print('In total, there are', len(total_cities), 'cities in', len(total_countries), 'countries')

In [None]:


earliest_date = df.iloc[:,0].min()
latest_date = df.iloc[:,0].max()
print('The earliest(starting) date on record is', earliest_date, 'and the latest(ending) date on record is', latest_date)

The earliest(starting) date on record is 1743-11-01 00:00:00 and the latest(ending) date on record is 2013-09-01 00:00:00


In [None]:
# Dropping all the empty element rows
df.dt = pd.to_datetime(df.dt)
df.dropna(subset=["AverageTemperature"],inplace=True)

In [None]:
# Changing the date and time forms
df["Month"] = df.dt.apply(lambda x: x.strftime("%B"))
df["Year"] = df.dt.apply(lambda x: x.year)
df.City=df.City.astype('str')

In [None]:
# Printing the list of countries and cities
country = list(df['Country'].unique())
print('Number of countries = ',len(country),'\n')

j=0
city=[]
for i in country:
    df_temp = df.loc[df['Country'] == i]
    city.append(list(df_temp['City'].unique()))
    print('Number of cities = ',len(city[j])," in ",i)
    j+=1

print('\n\nCountry = ',country,'\n\n')
print('City = ',city)

In [None]:


lat_num = np.array([np.float(cl[:-1]) * (1 if cl[-1] == "N" else -1) for cl in df.Latitude])
lon_num = np.array([np.float(cl[:-1]) * (1 if cl[-1] == "E" else -1) for cl in df.Longitude])

def plotCitiesOnMap():
  fig = go.Figure(go.Scattergeo(
      lat = lat_num,
      lon = lon_num,
      mode ='markers',
      marker=dict(color='red', #country_integer
                  colorscale='jet'),
      text=df.City, #cities_unique
  ))

  fig.update_layout(autosize=True, hovermode='closest',
                    title='Locations of Cities in Dataset',
                    geo=dict(showcountries=True))
  fig.show()

In [None]:


plotCitiesOnMap()

In [None]:


def MeanAnnualTemperature(df):
    plt.figure(figsize=(15, 7))
    df=df.groupby(["City","Year"], as_index=False)["AverageTemperature"].mean()
    df=df.groupby(["Year"], as_index=False)["AverageTemperature"].mean()
    plot=sns.lineplot(
            x="Year",
            y="AverageTemperature",
            data=df)        

In [None]:

MeanAnnualTemperature(df)

In [None]:

def calculateAnnualMean(city):
  
  daily_temperature = df.AverageTemperature[df.City == city]
  year = df.Year[df.City == city]
  year_unique = np.unique(year)

  annual_mean = [np.mean(daily_temperature[i == year]) for i in year_unique]
  return year_unique, np.array(annual_mean)

In [None]:
################    (This function can be used to make a plot for any city)

def plotAnnualMeanTemp(city):
  x, y = calculateAnnualMean(city)

  #Running Mean
  N = 10
  df1 = pd.DataFrame(y)
  y_mean = df1.rolling(N, center = True).mean().to_numpy().flatten()

  fig = go.Figure()
  fig.add_trace(go.Scatter(x=x, y=y, name = "Yearly Mean"))
  fig.add_trace(go.Scatter(x=x, y=y_mean, name = str(N) + " year running mean"))
  fig.update_layout(title = city, xaxis_title='Year', yaxis_title = 'Temperature (in Celsius)')
  fig.show()

In [None]:

plotAnnualMeanTemp("Toronto")

In [None]:

plotAnnualMeanTemp("Shanghai")

In [None]:
################    (This function can be used to make a plot for any city, for any specific month over all years)

def MonthCityMeanAnnualTemperature(df,City,Month):
    if City not in df.City.unique():
        print("City Doesnt Exist")
    else:
        plt.figure(figsize=(15, 7))
        df=df.loc[(df.City == City) & (df.Month == Month)]
        df=df.groupby(["Year"], as_index=False)["AverageTemperature"].mean()
        plot=sns.lineplot(
                x="Year",
                y="AverageTemperature",
                data=df)

In [None]:
MonthCityMeanAnnualTemperature(df,"Toronto","January")

In [None]:
MonthCityMeanAnnualTemperature(df,"Toronto","July")

In [None]:
################    (This function can be used to make a plot for any month, over all years)

def MonthCityTempOverAllCities(df,Month):
    df1=df.groupby(["Year","Month"], as_index=False)["AverageTemperature"].mean()
    df1=df1.loc[df1.Month==Month]
    MaxMonth=df1.loc[df1.AverageTemperature==df1.AverageTemperature.max()].Year
    MinMonth=df1.loc[df1.AverageTemperature == df1.AverageTemperature.min()].Year
    plt.figure(figsize=(15, 7))
    plot=sns.lineplot(
                x="Year",
                y="AverageTemperature",
                data=df1)
    
    df2=df.groupby(["City","Month"], as_index=False)["AverageTemperature"].mean()
    df2=df2.loc[df2.Month==Month]
    

In [None]:
MonthCityTempOverAllCities(df,"January")

In [None]:
MonthCityTempOverAllCities(df,"July")