In [1]:

import dash
import dash_core_components as dcc
import dash_html_components as html
import numpy as np
import pandas as pd
import gc

import matplotlib.pyplot as plt
from IPython.core.display import HTML

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import warnings

In [None]:

warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 300)
pd.set_option("display.max_rows", 20)
df = pd.read_csv('C:/Users/Michal/Documents/AIWDProjekt/city_temperature.csv')
print(df.head())

In [None]:

del df['State']
print(f"Year           : min: {df['Year'].min()}, max {df['Year'].max()}")
print(f"Month          : min: {df['Month'].min()}, max {df['Month'].max()}")
print(f"Day            : min: {df['Day'].min()}, max {df['Day'].max()}")
print(f"AvgTemperature : min: {df['AvgTemperature'].min()}, max {df['AvgTemperature'].max()}\n")

df[df['Day']==0].head()
df['Year'].value_counts().sort_index()
df = df[df['Day']!=0]
df = df[~df['Year'].isin([200,201,2020])]
df = df.drop_duplicates()
df['AvgTemperature'].value_counts(normalize=True).head(5)
df = df[df['AvgTemperature']!=-99]
df['days_in_year']=df.groupby(['Country','Year'])['Day'].transform('size')
df[df['days_in_year']<=270]
df=df[df['days_in_year']>270]
df['Date'] = pd.to_datetime(df[['Year','Month', 'Day']])
df['AvgTemperature'] = (df['AvgTemperature'] -32)*(5/9)
print(f"Year           : min: {df['Year'].min()}, max {df['Year'].max()}")
print(f"Month          : min: {df['Month'].min()}, max {df['Month'].max()}")
print(f"Day            : min: {df['Day'].min()}, max {df['Day'].max()}")
print(f"AvgTemperature : min: {df['AvgTemperature'].min()}, max {df['AvgTemperature'].max()}")
print(f"Final data set shape: {df.shape}")

In [None]:
dfg = (
       df.groupby('Year')['AvgTemperature'].agg(['mean','min','idxmin','max','idxmax']).reset_index()
      .merge(df[['Region','Country','City','Date']], left_on='idxmin',right_index=True)
      .merge(df[['Region','Country','City','Date']], left_on='idxmax',right_index=True,suffixes=('_min','_max'))
      )

# Top gorące/zimne kraje w latach
dft = df.groupby(['Country','City'])['AvgTemperature'].mean().sort_values(ascending=False).reset_index()
print(dft.head())
fig = make_subplots(
     rows=2
    ,cols=2
    ,column_widths=[0.5, 0.5]
    ,row_heights=[0.5, 0.5]
    ,vertical_spacing=0.15
    ,specs=[[{"type": "scatter", "colspan": 2},None],
           [  {"type": "bar"}, {"type": "bar"}]]
    ,subplot_titles=['Średnie wartości temperatury na świecie (1995-2019)','Top 5 najgorętszych miast','Top 5 najzimniejszych miast']
    ,y_title='Średnia temperatura °C'
)


trace = (
          px.scatter(dfg, x='Year', y='mean',trendline='ols',trendline_color_override='red')
         .add_trace(px.line(dfg, x='Year', y='mean').data[0]) 
         .update_traces(hovertemplate='<b>%{x}</b><br><i>Avg temp :<b> %{y}</b></i><br>%{text}'
                        ,text = ['Min temp : <b>'+str(d['min'])+'</b>, country : '+d['Country_min']+', city : '+d['City_min']+', date : '+str(d['Date_min'])[:10] +'<br>'+'Max temp : <b>'+str(d['max'])+'</b>, country : '+d['Country_max']+', city : '+d['City_max']+', date :'+str(d['Date_max'])[:10]
                                 for _, d in dfg.iterrows()]
                        ,hoverlabel_bgcolor='white')
        ).data
fig.add_trace(trace[0], row=1, col=1)
fig.add_trace(trace[1], row=1, col=1)
fig.add_trace(trace[2], row=1, col=1)

# Gorące kraje
fig.add_trace(
    (
     px.bar(
             dft.head(5)
            ,x='City'
            ,y='AvgTemperature'
            ,color='AvgTemperature'
            ,color_continuous_scale=['darkorange','red']
            ,hover_data=['Country', 'AvgTemperature'] 
            ,opacity=0.8)
           ).data[0],
    row=2, col=1
)

# Zimne kraje
fig.add_trace(
   (
     px.bar(
             dft.tail(5)
            ,x='City'
            ,y='AvgTemperature'
            ,color='AvgTemperature'
            ,color_continuous_scale=['blue','lightblue']
            ,hover_data=['Country', 'AvgTemperature']
            ,opacity=0.8)
           ).data[0],
    row=2, col=2
)

fig.update_layout(height=600, margin=dict(r=10, t=40, b=50, l=60))
fig.update_layout(coloraxis_autocolorscale=False, coloraxis_colorscale=['blue','lightblue','yellow','orange','darkorange','red'],coloraxis_colorbar_title='Temp °C')


In [None]:
iso_code = pd.read_csv('C:/Users/Michal/Documents/AIWDProjekt/iso_codes.csv')
iso_code = iso_code[['Country','ISO_Code']].drop_duplicates().reset_index(drop=True)
iso_code.head()

In [None]:
# Temperatura na świecie, kraje na mapie
dfc = (
       df.groupby(['Year','Country'])['AvgTemperature'].agg(['mean'])
      .reset_index()
      .rename(columns={'mean': 'AvgTemperature'})
      .merge(iso_code,left_on='Country',right_on='Country')
      .sort_values(by=['Year','Country'])
      )
dfc['Rank_hottest'] = dfc.groupby(by=['Year'])['AvgTemperature'].rank(method="min",ascending=False)
dfc['Rank_coldest'] = dfc.groupby(by=['Year'])['AvgTemperature'].rank(method="min",ascending=True)
dfc.head()

fig = (
   px.choropleth(
                 dfc               
                ,locations='ISO_Code'               
                ,color='AvgTemperature'
                ,hover_name='Country'  
                ,hover_data={'ISO_Code':False, 'Year':True,'AvgTemperature':':.2f'}
                ,animation_frame='Year'   
                ,color_continuous_scale='Portland' 
                ,height=600)
  .update_layout(
                 title_text='Średnia temperatura na świecie'
                ,title_x=0.3
                ,margin=dict(r=10, t=40, b=10, l=10)
                ,coloraxis_colorbar_title='Temp °C')
)
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 800
fig.show()



In [None]:
# Najgorętsze/zminiejsze państwa na przestrzeni lat
step_duration=800

fig = make_subplots(
     rows=2
    ,cols=2
    ,shared_xaxes=False
    ,shared_yaxes=False
    ,column_widths=[0.5,0.5]
    ,row_heights=[0.2, 0.8]
    ,horizontal_spacing=0.05
    ,vertical_spacing=0.1 
    ,specs=[[{"type": "table"},{"type": "table"}], 
            [{"type": "bar"},{"type": "bar"}]]
    ,subplot_titles=[None,None,'Najzimniejsze państwa','Najgorętsze państwa']
    ,y_title='Średnia temperatura °C'
)

dfg_t=dfg[dfg['Year']==1995]


fig.add_trace(
    go.Table(
        header=dict(
             values=list(['<b>' + 'Najniższa temperatura' + '</b>','',''])
            ,align="left"
            ,line_color='white'
            ,fill_color='white'
        ),
        cells=dict(
             values=['<b>' + dfg_t['min'].map(u"{:,.2f}".format) + '</b>', dfg_t['Date_min'].map(u"{:%Y-%m-%d}".format), dfg_t['City_min']+', '+dfg_t['Country_min']]
            ,align = "left"
            ,line_color='white'
            ,fill_color='white'
        )
    ),
    row=1, col=1
)


fig.add_trace(
    go.Table(
        header=dict(
             values=list(['<b>' + 'Najwyższa temperatura' + '</b>','',''])
            ,align="left"
            ,line_color='white'
            ,fill_color='white'
        ),
        cells=dict(
             values=['<b>' + dfg_t['max'].map(u"{:,.2f}".format) + '</b>', dfg_t['Date_max'].map(u"{:%Y-%m-%d}".format), dfg_t['City_max']+', '+dfg_t['Country_max']]
            ,align = "left"
            ,line_color='white'
            ,fill_color='white'
        )
    ),
    row=1, col=2
)


fig.add_trace(
    (
       px.bar(
              data_frame=dfc[dfc['Rank_coldest']<=5].sort_values(['Year','Rank_coldest'])
             ,x='Country'
             ,y='AvgTemperature'
             ,color='AvgTemperature'
             ,text='AvgTemperature'
             ,hover_data={'Year':False}
             ,animation_frame='Year'
             ,opacity=0.8)
      .update_layout(
                     coloraxis_colorbar_title='Temp °C'
                    ,title_text='Najzimniejsze państwa'
                    ,title_x=0.5)
      .update_xaxes(title_text=None)
      .update_yaxes(title_text='Średnia temperatura °C', range=[-4,33])               
      .update_traces(texttemplate='%{text:.2f}')
).data[0],
row=2, col=1
)


fig.add_trace(
    (
       px.bar(
              data_frame=dfc[dfc['Rank_hottest']<=5].sort_values(['Year','Rank_hottest'])
             ,x='Country'
             ,y='AvgTemperature'
             ,color='AvgTemperature'
             ,text='AvgTemperature'
             ,hover_data={'Year':False}
             ,animation_frame='Year'
             ,opacity=0.8)
      .update_layout(
                     coloraxis_colorbar_title='Temp °C'
                    ,title_text='Najgorętsze państwa'
                    ,title_x=0.5)
      .update_xaxes(title_text=None)
      .update_yaxes(title_text='Średnia temperatura °C', range=[-4,33])               
      .update_traces(texttemplate='%{text:.2f}')
).data[0],
row=2, col=2
)


years = list(dfc['Year'].sort_values().unique())
frames=[]
for year in years: 
    dfg_t=dfg[dfg['Year']==year]
    dfc_c=dfc[(dfc['Rank_coldest']<=5)&(dfc['Year']==year)].sort_values(['Year','Rank_coldest'])
    dfc_h=dfc[(dfc['Rank_hottest']<=5)&(dfc['Year']==year)].sort_values(['Year','Rank_hottest'])
    
    frames.append(go.Frame(
                  name=str(year),
                  data=[
                        go.Table(cells=dict(
                            values=['<b>' + dfg_t['min'].map(u"{:,.2f}".format) + '</b>', dfg_t['Date_min'].map(u"{:%Y-%m-%d}".format), dfg_t['City_min']+', '+dfg_t['Country_min']]))
                       ,go.Table(cells=dict(
                            values=['<b>' + dfg_t['max'].map(u"{:,.2f}".format) + '</b>', dfg_t['Date_max'].map(u"{:%Y-%m-%d}".format), dfg_t['City_max']+', '+dfg_t['Country_max']]))
                       ,go.Bar(x=dfc_c['Country'], y=dfc_c['AvgTemperature'], text=dfc_c['AvgTemperature'])
                       ,go.Bar(x=dfc_h['Country'], y=dfc_h['AvgTemperature'], text=dfc_h['AvgTemperature'])
                      ],
                  traces=[0,1,2,3]))

fig.frames=frames


buttons = [dict(
                 label='Play'
                ,method='animate'
                ,args=[  [f'{year}' for year in years[1:]]
                        ,dict(frame=dict(duration=step_duration, easing='linear', redraw=True)   
                        ,fromcurrent=True
                        ,transition=dict(duration=0, easing='linear'))])         
          ,dict(
                 label='Pause'
                ,method='animate'
                ,args=[  [None]
                        ,dict(frame=dict(duration=0, redraw=False)
                        ,mode='immediate'      
                        ,transition=dict(duration=0))])
          ]

updatemenus=[dict(
                   type='buttons'
                  ,direction='left'  
                  #,showactive=True 
                  ,y=0
                  ,x=-0.1
                  ,xanchor='left'
                  ,yanchor='top'
                  ,pad=dict(b=10, t=45) 
                  ,buttons=buttons)]

sliders= [dict(
                yanchor='top'
               ,xanchor='left' 
               ,currentvalue=dict(prefix='Year: ', visible=True, xanchor='left')
               ,transition=dict(duration=0, easing='linear')
               ,pad=dict(b=10, t=25) 
               ,len=0.9, x=0.1, y=0 
               ,steps=[
                       dict(
                            args=[
                                   [year]
                                  ,dict(frame=dict(duration=step_duration, easing='linear', redraw=True)
                                  ,transition=dict(duration=0, easing='linear'))] 
                          ,label= str(year), method='animate')
                      for year in years       
                    ])]

fig.update_layout(updatemenus=updatemenus, sliders=sliders)
fig.update_layout(height=600,margin=dict(r=10, t=30, b=50, l=10))
fig.update_layout(coloraxis_autocolorscale=False, coloraxis_colorscale=['blue','lightblue','yellow','orange','darkorange','red'],coloraxis_colorbar_title='Temp °C')
fig.update_yaxes(range=[-4, 33], autorange=False, row=2, col=1)
fig.update_yaxes(range=[-4, 33], autorange=False, row=2, col=2)  

In [None]:
dfr = (
       df.groupby(['Year','Region'])['AvgTemperature'].agg(['mean','min','idxmin','max','idxmax']).reset_index()
      .merge(df[['Country','City','Date']], left_on='idxmin',right_index=True)
      .merge(df[['Country','City','Date']], left_on='idxmax',right_index=True,suffixes=('_min','_max'))
      )


fig = make_subplots(
     rows=1
    ,cols=1
    ,column_widths=[1]
    ,horizontal_spacing=0.05
    ,shared_yaxes=True
    ,specs=[[  {"type": "scatter"}]]
    ,y_title='Średnia temperatura °C'
)

#Max temperatura na świecie w czasie
traces = (
            px.line(dfr, x='Year', y='mean',color='Region', line_dash='Region')   
           .update_yaxes(title_text='Average temperature °C')
           .for_each_trace(
                 lambda trace: trace.update(hovertemplate='<b>%{x}</b><br><i>Avg temp :<b> %{y}</b></i><br>%{text}'
                               ,text = ['Min temp : <b>'+str(d['min'])+'</b>, country : '+d['Country_min']+', city : '+d['City_min']+', date : '+str(d['Date_min'])[:10] +'<br>'+'Max temp : <b>'+str(d['max'])+'</b>, country : '+d['Country_max']+', city : '+d['City_max']+', date :'+str(d['Date_max'])[:10]
                                   for _, d in dfr[dfr['Region']==trace.name].iterrows()]
                               ,hoverlabel_bgcolor='white'))
        ).data

for trace in traces:
    fig.add_trace(trace, row=1, col=1)

    
fig.update_layout(height=450, margin=dict(r=10, t=60, b=50, l=10), title_text="Średnia temperatura w regionach świata", title_x=0.5)


In [None]:
#temperatura w sezonie, w regionie na przełomie lat
month_dict = {1:"January", 2:"February", 3:"March", 4:"April", 5:"May", 6:"June" ,7:"July", 8:"August", 9:"September", 10:"October", 11:"November", 12:"December"}
season_dict = {1:"Winter", 2:"Spring", 3:"Summer", 4:"Autumn"}
season_month_map = {1:1, 2:1, 3:2, 4:2, 5:2, 6:3, 7:3, 8:3, 9:4, 10:4, 11:4, 12:1}

dfmc = (
       df.groupby(['Year','Month','Region','Country'])['AvgTemperature'].agg(['mean'])
      .reset_index()
      .rename(columns={'mean': 'AvgTemperature','Month': 'Month_num'})
      .sort_values(by=['Year','Month_num','Region','Country'])
      )

dfmc['Season_num'] = dfmc['Month_num'].map(season_month_map)
dfmc['Season'] = dfmc['Season_num'].map(season_dict)
dfmc['Month'] = dfmc['Month_num'].map(month_dict)

dfsr = (
       dfmc.groupby(['Year','Season_num','Season','Region'])['AvgTemperature'].agg(['mean'])
      .reset_index()
      .rename(columns={'mean': 'AvgTemperature'})
      .sort_values(by=['Year','Season_num','Region'])
      )

(
   px.line(
           dfsr
          ,x='Year'     
          ,y='AvgTemperature'
          ,color='Region'
          ,facet_row='Season'
          ,facet_col='Region'
          ,facet_row_spacing=0.03
          ,hover_name='Region'
          ,hover_data={'Region':False,'Season':True,'AvgTemperature':':.2f'}
          ,height=450
          ,width=800)
  .update_traces(showlegend=False)
  .update_layout(
                 title_text='Średnia temperatura w sezonach °C'
                ,title_x=0.25
                ,margin=dict(r=40, t=60, b=50, l=0))
  .update_yaxes(title_text=None)  
  .for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1],textangle=0))
).show()  

In [None]:
#Średnia temperatura dla każdego państwa z wyborem z menu, z podziałem na sezony
dfyc = dfmc.groupby(['Country','Year'])['AvgTemperature'].mean().reset_index()
dfycs = dfmc.groupby(['Country','Year','Season_num','Season'])['AvgTemperature'].mean().reset_index()


fig = make_subplots(
     rows=2
    ,cols=4
    ,row_heights=[0.5, 0.5]
    ,vertical_spacing=0.1
    ,horizontal_spacing=0.02
    ,shared_yaxes=True
    ,specs=[[{"type": "scatter", "colspan": 4},None,None,None]
            ,[{"type": "scatter"},{"type": "scatter"},{"type": "scatter"},{"type": "scatter"}]]
    ,subplot_titles=[
                      'Średnia temperatura w państwie'
                     ,'Zima','Wiosna','Lato','Jesień']
)


fig.add_trace(
    (
       px.line(
           data_frame = dfyc[dfyc['Country']==dfyc['Country'].head(1).squeeze()]
          ,x='Year'
          ,y='AvgTemperature')  
).data[0],
row=1, col=1
)

traces_seasonal = (
       px.line(
           data_frame = dfycs[dfycs['Country']==dfyc['Country'].head(1).squeeze()]
          ,x='Year'     
          ,y='AvgTemperature'
          ,facet_col='Season'
          ,color='Season'
          ,color_discrete_sequence=['blue','green','red','orange'])
      .update_traces(showlegend=False)
      .for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1],textangle=0)) 
).data
for i, trace in enumerate(traces_seasonal):
    fig.add_trace(trace, row=2, col=i+1)


buttons = []

for country in dfyc['Country'].sort_values().unique():

    dfyc_c = dfyc[dfyc['Country']==country]
    args_x=[dfyc_c['Year']]
    args_y=[dfyc_c['AvgTemperature']]
    args_f=[0]

    for i in range(len(traces_seasonal)):
        dfycs_c = dfycs[(dfycs['Country']==country)&(dfycs['Season_num']==i+1)]
        args_x.append(dfycs_c['Year'])
        args_y.append(dfycs_c['AvgTemperature'])
        args_f.append(i+1)
    
    buttons.append(dict(method='restyle',
                        label=country,
                        visible=True,
                        args=[{'x': args_x,'y':args_y}, args_f]
                        )
                  )    


# menu
updatemenu=[dict(
                   buttons=buttons
                  ,direction='down'
                  ,pad={'r': 10, 't': 10}
                  ,showactive=True
                  ,x=-0.05
                  ,xanchor='left'
                  ,y=1.1
                  ,yanchor='top')] 
                              

fig.data[0].line.dash='dash'
fig.data[0].mode ='markers+lines' 
fig.data[0].line.color='#00CC96' 
fig.update_layout(font_size=10)
fig.for_each_annotation(lambda a: a.update(font=dict(size=14)))
fig.layout.annotations[0].font.size=16
fig.layout.annotations[-1].font.size=16
fig.update_xaxes(range=[1995, 2019], autorange=False, row=1)
fig.update_xaxes(range=[1995, 2019], autorange=False, row=2) 
fig.update_xaxes(showticklabels=False, row=3)
fig.update_yaxes(showticklabels=False, row=3)

fig.update_traces(showlegend=True, selector=dict(type='histogram'))
fig.update_layout( updatemenus=updatemenu
                  ,height=600
                  ,barmode='overlay'
                  ,margin=dict(r=10, t=20, b=30, l=0)
                  ,legend=dict(
                               orientation='h'
                              ,yanchor='top'
                              ,y=0.33
                              ,xanchor='left'
                              ,x=-0.05
                            ))

del dfyc_c, dfycs_c, dfyc, dfycs
gc.collect()

fig.show()