In [5]:
import os
import pandas as pd


In [7]:
data_path = os.path.join("..", 'data', 'Data_popularity_google.csv')
df = pd.read_csv(data_path)
print(df.head())

  municipality  year  treat  newcomers     rents  popu  year_2013  year_2016  \
0       adachi  2008      0    28533.0  281500.0   484        NaN        NaN   
1       adachi  2009      0    31389.0  275200.0   528        NaN        NaN   
2       adachi  2010      0    27831.0  263500.0   459        NaN        NaN   
3       adachi  2011      0    27978.0  262100.0   447        NaN        NaN   
4       adachi  2012      0    27850.0  262800.0   423        NaN        NaN   

   year_2018  
0        NaN  
1        NaN  
2        NaN  
3        NaN  
4        NaN  


In [9]:
# List 1 - with dataset
list_with_dataset = [col for col in df.columns if df[col].isnull().any()]
print("Columns with missing values:", list_with_dataset)

Columns with missing values: ['newcomers', 'rents', 'year_2013', 'year_2016', 'year_2018']


In [11]:
# Create a list of basic statistics for "newcomers" and "rents" variables from a dataset
newcomers_stats = [df['newcomers'].mean(), df['newcomers'].median()]
rents_stats = [df['rents'].mean(), df['rents'].median()]
print("Newcomers stats (mean, median):", newcomers_stats)
print("Rents stats (mean, median):", rents_stats)
print(type(newcomers_stats)) #Show that this is a list
print(type(rents_stats)) #Show that this is a list

Newcomers stats (mean, median): [26574.4, 20807.0]
Rents stats (mean, median): [477326.2626262626, 369400.0]
<class 'list'>
<class 'list'>


In [13]:
# Create a list containing first three years from the dataset
first_three_years = df['year'][:3].tolist()
print("First three years:", first_three_years)
print(type(first_three_years)) #Show that this is a list

# Create a list containing even years in the year column from the dataset
even_years = list(set([year for year in df['year'] if year % 2 == 0]))
print("Even years:", even_years)
print(type(even_years)) #Show that this is a list

First three years: [2008, 2009, 2010]
<class 'list'>
Even years: [2016, 2018, 2020, 2008, 2010, 2012, 2014]
<class 'list'>


In [15]:
# Another way to show even years using list
start = 2008
stop = 2022
step = 2
unique_years = sorted(set(df['year']))
year_range = range(start, stop, step)
filtered_years = [year for year in unique_years if year in year_range]
print("Filtered years (no duplicates, stepping by 2):", filtered_years)
print(type(filtered_years)) #Show that this is a list

Filtered years (no duplicates, stepping by 2): [2008, 2010, 2012, 2014, 2016, 2018, 2020]
<class 'list'>


In [17]:
# Remove elements from a list
filtered_years.remove(2014) # remove a particular item
print("Filtered years (2014 removed):", filtered_years)

Filtered years (2014 removed): [2008, 2010, 2012, 2016, 2018, 2020]


In [19]:
# Use join method 
print(''.join(str(year) for year in filtered_years))      
print(' '.join(str(year) for year in filtered_years))    
print(', '.join(str(year) for year in filtered_years))

200820102012201620182020
2008 2010 2012 2016 2018 2020
2008, 2010, 2012, 2016, 2018, 2020


In [21]:
# Dictionary
year_stats = {
    "min_year": min(filtered_years),
    "max_year": max(filtered_years),
    "count": len(filtered_years)
}
print("Year statistics:", year_stats)
print(type(year_stats)) #Show that this is a dictionary

Year statistics: {'min_year': 2008, 'max_year': 2020, 'count': 6}
<class 'dict'>


In [23]:
# Create a nested dictionary based on municipality and year
municipality_data = {}

# Populate the dictionary
for index, row in df.iterrows():
    municipality = row['municipality']
    year = row['year']
    
    if municipality not in municipality_data:
        municipality_data[municipality] = {}
    
    municipality_data[municipality][year] = {
        "Treatment": row['treat'],
        "Newcomers to the municipality": row['newcomers'],
        "Rent price": row['rents'],
        "Populality": row['popu']
    }

# Display the dictionary (only for first 5 municipalities)
municipality_count = 0
for municipality, years in municipality_data.items():
    if municipality_count >= 5:
        break 
    print(f"Data for {municipality}:")
    for year, info in years.items():
        print(f"  Year {year}: {info}")
    municipality_count += 1 

print(type(municipality_data)) #Show that this is a dictionary

Data for adachi:
  Year 2008: {'Treatment': 0, 'Newcomers to the municipality': 28533.0, 'Rent price': 281500.0, 'Populality': 484}
  Year 2009: {'Treatment': 0, 'Newcomers to the municipality': 31389.0, 'Rent price': 275200.0, 'Populality': 528}
  Year 2010: {'Treatment': 0, 'Newcomers to the municipality': 27831.0, 'Rent price': 263500.0, 'Populality': 459}
  Year 2011: {'Treatment': 0, 'Newcomers to the municipality': 27978.0, 'Rent price': 262100.0, 'Populality': 447}
  Year 2012: {'Treatment': 0, 'Newcomers to the municipality': 27850.0, 'Rent price': 262800.0, 'Populality': 423}
  Year 2013: {'Treatment': 0, 'Newcomers to the municipality': 27124.0, 'Rent price': 254800.0, 'Populality': 436}
  Year 2014: {'Treatment': 0, 'Newcomers to the municipality': 28002.0, 'Rent price': nan, 'Populality': 500}
  Year 2015: {'Treatment': 0, 'Newcomers to the municipality': 29980.0, 'Rent price': nan, 'Populality': 475}
  Year 2016: {'Treatment': 0, 'Newcomers to the municipality': 28195.0, '

In [27]:
# Dictionary 2
for municipality in municipality_data.keys():
    print(municipality)

adachi
arakawa
bunkyo
chiyoda
chohu
chuo
edogawa
funabashi
ichikawa
itabashi
katsushika
kawaguchi
kawasaki
kita
komae
koto
matsudo
meguro
minato
misato
mitaka
musashino
nakano
narashino
nerima
ota
setagaya
shibuya
shinagawa
shinjuku
soka
suginami
sumida
taito
toda
toshima
urayasu
wako
yashio
yokohama


In [130]:
# Perform operation in files in different folders - save this notebook into script folder
main_folder = 'coe_final_assignment'
target_folder = os.path.join("..", 'scripts_stata_python')
current_notebook_path = os.path.abspath("Question3_list_dictionary.ipynb")
target_notebook_path = os.path.join(target_folder, os.path.basename(current_notebook_path))
shutil.move(current_notebook_path, target_notebook_path)
print(f"Notebook moved to {target_notebook_path}")

Notebook moved to ../scripts_stata_python/Question3_list_dictionary.ipynb
