In [2]:
import pandas as pd  
import os


In [3]:
csv_path = os.path.join('..', 'Web Scrapping', 'raw_gci_data.csv')

# Read the CSV file
gci_df = pd.read_csv(csv_path)  # Using pandas

In [4]:
gci_df.head(50)

Unnamed: 0,Year,Rank,Country,Crime Index
0,2012,1,Venezuela,84.7
1,2012,2,South Africa,78.1
2,2012,3,Puerto Rico,73.1
3,2012,4,Malaysia,70.9
4,2012,5,United States,64.9
5,2012,6,Algeria,64.8
6,2012,7,Mexico,62.5
7,2012,8,Peru,60.9
8,2012,9,Lebanon,59.9
9,2012,10,Bangladesh,59.7


In [5]:
gci_df.drop('Rank',axis=1,inplace=True)

In [6]:
gci_df.isna().sum() # check any null values

Year           0
Country        0
Crime Index    0
dtype: int64

In [8]:
print(len(gci_df['Year'].unique())) # total unique year value
print(gci_df['Year'].max()) #max year
print(gci_df['Year'].min()) #min year
print(gci_df['Year'].max()-gci_df['Year'].min()+1) #looks like all years from 1990 to 2021 exist

12
2023
2012
12


In [9]:
all_years = range(2012, 2024)  # 2021 inclusive

# Create a pivot table with countries as rows and years as columns
pivot_df = (
    gci_df.pivot_table(
        index='Country',
        columns='Year',
        values='Crime Index',
        aggfunc='first'  # Takes the first occurrence if duplicates exist
    )
    .reindex(columns=all_years)  # Ensure all years are present as columns
    .sort_index(axis=0)  # Sort countries alphabetically
    .reset_index()       # Move Country from index to column
)

# Rename the columns to match your requirements
pivot_df.columns.name = None  # Remove the 'Year' label from columns
pivot_df = pivot_df.rename(columns={'Country': 'Country'})

In [10]:
pivot_df.head(50)

Unnamed: 0,Country,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Afghanistan,,,82.5,77.3,,73.9,,76.6,76.2,76.4,77.0,78.6
1,Albania,30.1,47.0,51.3,49.6,48.2,44.3,39.9,40.3,39.5,41.6,46.1,45.7
2,Algeria,64.8,51.8,52.3,51.3,57.6,49.6,50.7,48.3,49.8,51.9,53.8,52.6
3,Angola,,52.3,63.0,68.0,,65.5,,,65.0,66.6,66.4,65.4
4,Argentina,48.4,59.3,57.5,62.4,63.3,62.6,61.8,62.6,61.8,63.3,64.1,64.3
5,Armenia,,41.1,35.0,29.1,31.1,27.1,28.3,22.0,21.6,23.3,22.1,22.0
6,Australia,40.6,44.4,41.2,42.2,43.1,42.4,42.5,42.8,41.4,42.4,43.8,45.5
7,Austria,44.1,30.4,25.8,26.2,24.1,19.2,20.4,21.4,23.7,25.2,26.1,27.3
8,Azerbaijan,38.5,36.7,34.2,35.5,32.4,27.1,30.5,31.7,31.6,31.7,32.3,31.8
9,Bahamas,,,,72.9,,63.2,,,62.9,62.3,62.5,62.4


In [11]:
# Fill NaN values with the respective country's mean
pivot_df = pivot_df.set_index("Country").apply(lambda row: row.fillna(row.mean()), axis=1).reset_index()

In [12]:
pivot_df.to_csv('global_crime_index.csv',index=False)