In [1]:
#This tells python to draw the graphs "inline" - in the notebook
%matplotlib inline  
import matplotlib.pyplot as plt
import statsmodels.api as sm
from math import sqrt
from numpy.random import seed
from numpy.random import randn
from numpy import mean
from scipy.stats import sem
import statistics 
import seaborn as sns
from IPython.display import display, Math, Latex, display_latex
import plotly.express as px
import pylab
import pandas as pd
import numpy as np
# make the plots (graphs) a little wider by default
pylab.rcParams['figure.figsize'] = (10., 8.)
sns.set(font_scale=1.5)
sns.set_style("white")

# 1. Data Loading and Exploration

In [2]:
#read the local csv file as 
smartcity_index = pd.read_csv('Smart_City_index.csv')
smartcity_index.head(20)

Unnamed: 0,Id,City,Country,Smart_Mobility,Smart_Environment,Smart_Government,Smart_Economy,Smart_People,Smart_Living,SmartCity_Index
0,1,Oslo,Norway,6480,6512,7516,4565,8618,9090,7138
1,2,Bergen,Norway,7097,6876,7350,4905,8050,9090,7296
2,3,Amsterdam,Netherlands,7540,5558,8528,8095,7098,7280,7311
3,4,Copenhagen,Denmark,7490,7920,8726,5580,5780,7200,7171
4,5,Stockholm,Sweden,6122,7692,8354,4330,6743,7730,6812
5,6,Montreal,Canada,7490,4848,6624,6180,8465,9920,7353
6,7,Vienna,Austria,5683,7608,6232,5415,8580,7500,6771
7,8,Odense,Denmark,6160,8404,7578,5200,6955,7200,6886
8,9,Singapore,Singapore,5790,4344,5560,5535,9695,10000,6813
9,10,Boston,United States,7870,5224,6020,8935,6573,6220,6852


### **variable explanation**
**Smart_Mobility**: 
Index calculated from assessment of city-wide Public Transportation System, ICT, accessibility infrastructure

**Smart_Environment**: 
Index calculated from environmental sustainability impact, monitoring pollution and energy managemen.

**Smart_Government**: Index calculated from comparative study of transparent governance & open data initiatives of smart cities across the world. Also, citizen 
participation in decision making has been included.

**Smart_Economy**: Index calculated through global comparison of city-wide productivity, economic vitality, and support for entrepreneurship and innovation.

**Smart_People**: Index calculated by comparing social and cultural plurality, education systems and its supporting ancillary facilities across the world.

**Smart_Living**: Index calculated by measuring metric around healthcare services, social security and housing quality.

**SmartCity_Index**: Aggregate score for smart city model based on smart city supergroups.et.

In [3]:
smartcity_index.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102 entries, 0 to 101
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Id                 102 non-null    int64 
 1   City               102 non-null    object
 2   Country            102 non-null    object
 3   Smart_Mobility     102 non-null    int64 
 4   Smart_Environment  102 non-null    int64 
 5   Smart_Government   102 non-null    int64 
 6   Smart_Economy      102 non-null    int64 
 7   Smart_People       102 non-null    int64 
 8   Smart_Living       102 non-null    int64 
 9   SmartCity_Index    102 non-null    int64 
dtypes: int64(8), object(2)
memory usage: 8.1+ KB


In [10]:
original_column_names = smartcity_index.columns
print(original_column_names)


Index(['Id', 'City', 'Country', 'Smart_Mobility ', 'Smart_Environment',
       'Smart_Government ', 'Smart_Economy ', 'Smart_People', 'Smart_Living',
       'SmartCity_Index'],
      dtype='object')


In [8]:
#rename columns
sci = smartcity_index.rename(columns={
    'Smart_Mobility': 'mobility',
    'Smart_Environment': 'environment',
    'Smart_Government': 'government',
    'Smart_Economy': 'economy',
    'Smart_People': 'people',
    'Smart_Living': 'living',
    'SmartCity_Index': 'index',
})
sci.head(20)

Unnamed: 0,Id,City,Country,Smart_Mobility,environment,Smart_Government,Smart_Economy,people,living,index
0,1,Oslo,Norway,6480,6512,7516,4565,8618,9090,7138
1,2,Bergen,Norway,7097,6876,7350,4905,8050,9090,7296
2,3,Amsterdam,Netherlands,7540,5558,8528,8095,7098,7280,7311
3,4,Copenhagen,Denmark,7490,7920,8726,5580,5780,7200,7171
4,5,Stockholm,Sweden,6122,7692,8354,4330,6743,7730,6812
5,6,Montreal,Canada,7490,4848,6624,6180,8465,9920,7353
6,7,Vienna,Austria,5683,7608,6232,5415,8580,7500,6771
7,8,Odense,Denmark,6160,8404,7578,5200,6955,7200,6886
8,9,Singapore,Singapore,5790,4344,5560,5535,9695,10000,6813
9,10,Boston,United States,7870,5224,6020,8935,6573,6220,6852


In [5]:
sci.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102 entries, 0 to 101
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Id                 102 non-null    int64 
 1   City               102 non-null    object
 2   Country            102 non-null    object
 3   Smart_Mobility     102 non-null    int64 
 4   environment        102 non-null    int64 
 5   Smart_Government   102 non-null    int64 
 6   Smart_Economy      102 non-null    int64 
 7   people             102 non-null    int64 
 8   living             102 non-null    int64 
 9   index              102 non-null    int64 
dtypes: int64(8), object(2)
memory usage: 8.1+ KB


In [6]:
#Summary Statistics
summary = sci.describe().round(2)
summary=summary.T
summary

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Id,102.0,51.5,29.59,1.0,26.25,51.5,76.75,102.0
Smart_Mobility,102.0,5786.11,1202.63,3175.0,4864.75,5724.0,6763.75,8110.0
environment,102.0,5935.93,1722.71,1850.0,4530.5,6491.0,7310.0,8844.0
Smart_Government,102.0,5916.58,1175.27,2806.0,5143.0,5932.0,6585.0,8726.0
Smart_Economy,102.0,6114.28,1790.14,1490.0,5007.5,6391.5,7463.75,9225.0
people,102.0,5896.81,1475.25,2825.0,4724.75,5747.5,7072.5,9695.0
living,102.0,6354.86,2274.48,1980.0,4385.0,6334.0,8696.75,10000.0
index,102.0,5928.46,1038.96,-24.0,5344.75,6248.0,6672.5,7353.0


In [7]:
num_cities_by_country = sci.groupby('Country')['City'].count().sort_values(ascending=False)
num_cities_by_country


Country
Italy                   11
Germany                 10
Finland                  9
France                   7
United States            7
Canada                   6
Denmark                  4
Australia                4
Norway                   4
China                    3
United Kingdom           3
Sweden                   3
Russia                   2
Japan                    2
South Korea              2
Spain                    2
United Arab Emirates     2
Switzerland              2
Slovenia                 1
Taiwan                   1
Slovakia                 1
Singapore                1
Portugal                 1
3900                     1
New Zealand              1
Netherlands              1
Malaysia                 1
Luxembourg               1
Israel                   1
Ireland                  1
Iceland                  1
Hungary                  1
Estonia                  1
Czech Republic           1
Belgium                  1
Austria                  1
Latvia              