In [352]:
import pandas as pd
import numpy as np
import os
import glob

In [353]:
def read_excel_sheets(file_path, sheet_names):
    """
    Read specific sheets from an Excel file and return them as a list of dataframes
    
    Parameters
    ---------------
    file_path : str
        The path to the Excel file
        
    sheet_names : list of str
        The names of the sheets to read from the Excel file
        
    Returns
    ---------------
    list of pandas.DataFrame
    """
    
    # Create an empty list to store the dataframes
    dfs = []
    
    # Loop through each sheet and read it into a dataframe
    for sheet_name in sheet_names:
        try:
            if sheet_name == "3":
                df = pd.read_excel(file_path, sheet_name=sheet_name, skiprows=0)
            else:
                df = pd.read_excel(file_path, sheet_name=sheet_name, skiprows=1)
            dfs.append(df)
        except Exception as e:
            print(f"Error reading sheet {sheet_name} from file {file_path}: {e}")

    # Return the list of dataframes
    return dfs

# Example usage
file_path = r"C:/Users/chimi/Desktop/Python Data Science Projects/Italy Crime/Data/Crime Rate data/Calls_to_1522.data/Calls_to_1522.xlsx"
sheet_names = ["Table 1", "Table 2", "Table 3", "Table 4", "Table 5", "Table 6", "Table 7", "Table 8", "Table 9", "Table 10", "Table 11", "Table 12", "Table 13", "Table 14"]

# Call the function to read the sheets into a list of dataframes
dfs = read_excel_sheets(file_path, sheet_names)

# Assign each dataframe to a separate variable
df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13, df14 = dfs


In [354]:
df1

Unnamed: 0.1,Unnamed: 0,2013,2014,2015,2016,2017,2018,2019,2020
0,Valid calls (users),,,,,,,,
1,victim of violence seeking for help,7064.0,4672.0,3487.0,3620.0,3288.0,4567.0,4329.0,8608.0
2,legal information,722.0,896.0,397.0,421.0,374.0,628.0,254.0,394.0
3,victim of stalking seeking for help,1463.0,873.0,685.0,672.0,569.0,610.0,592.0,934.0
4,useful phone numbers for out of target calls,4618.0,2425.0,1439.0,1025.0,1022.0,1883.0,1074.0,3493.0
5,reporting of violence,2083.0,1281.0,1030.0,1095.0,975.0,947.0,782.0,1969.0
6,information about national shelters for victim...,5068.0,5027.0,4396.0,3416.0,2806.0,2808.0,2016.0,3340.0
7,information about the helpline 1522,1476.0,1417.0,1263.0,1294.0,1445.0,3099.0,4108.0,4061.0
8,information for professionals on the procedure...,80.0,27.0,19.0,20.0,24.0,19.0,27.0,47.0
9,information on legal responsibility of the pub...,25.0,8.0,12.0,23.0,20.0,14.0,28.0,18.0


In [355]:
# Drop NA values rows
df1 = df1.drop([0,19])

In [356]:
df1.isna().sum()

Unnamed: 0    0
2013          0
2014          0
2015          0
2016          0
2017          0
2018          0
2019          0
2020          0
dtype: int64

In [357]:
# Check for duplicates
df1.duplicated().sum()

0

In [358]:
df1 = df1.melt(id_vars='Unnamed: 0',value_vars=[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020],var_name='Year',value_name='Total' )

In [359]:
df1.rename(columns={'Unnamed: 0' :'Valid Call Reason'}, inplace=True)

In [360]:
df1.head(5)

Unnamed: 0,Valid Call Reason,Year,Total
0,victim of violence seeking for help,2013,7064.0
1,legal information,2013,722.0
2,victim of stalking seeking for help,2013,1463.0
3,useful phone numbers for out of target calls,2013,4618.0
4,reporting of violence,2013,2083.0


In [361]:
# Convert to csv
df1.to_csv('validusercalls.csv', index=False)

In [223]:
df2

Unnamed: 0.1,Unnamed: 0,2018,Unnamed: 2,Unnamed: 3,2019,Unnamed: 5,Unnamed: 6,2020,Unnamed: 8
0,,Type oc channel,,,Type oc channel,,,Type oc channel,
1,Valid calls (users),Phone calls,Chat,,Phone calls,Chat,,Phone calls,Chat
2,,,,,,,,,
3,victim of violence seeking for help,4411,156,,3975,354,,7139,1469
4,legal information,606,22,,235,19,,339,55
5,victim of stalking seeking for help,577,33,,525,67,,763,171
6,useful phone numbers for out of target calls,1869,14,,1037,37,,3178,315
7,reporting of violence,929,18,,744,38,,1798,171
8,information about national shelters for victim...,2767,41,,1957,59,,2987,353
9,information about the helpline 1522,3013,86,,3868,240,,3290,771


In [362]:
#Valid Call Reason type(phone and chat)
df2 = df2.drop([0,1,2,18,19])

In [363]:
 # Drop NAN columns
df2.drop(columns=['Unnamed: 3','Unnamed: 6'],inplace=True)

In [364]:
# Rename some of the column
df2.rename(columns={'Unnamed: 0':'Valid Reason Call', 
                     2018:'2018_PhoneCalls', 
                     'Unnamed: 2':'2018_Chat', 
                      2019:'2019_PhoneCalls', 
                     'Unnamed: 5':'2019_Chat', 
                     2020:'2020_PhoneCalls', 
                     'Unnamed: 8':'2020_Chat'}, inplace=True)


In [365]:
df2.isnull().sum()

Valid Reason Call    0
2018_PhoneCalls      0
2018_Chat            0
2019_PhoneCalls      0
2019_Chat            0
2020_PhoneCalls      0
2020_Chat            0
dtype: int64

In [366]:
# Check for duplicates
df2.duplicated().sum()

0

In [367]:
df2 = df2.melt(id_vars='Valid Reason Call',value_vars=['2018_PhoneCalls', '2018_Chat','2019_PhoneCalls','2019_Chat','2020_PhoneCalls','2020_Chat'],var_name='Year',value_name='Number of Calls/Chat' )

In [368]:
df2[['Year', 'Category']] = df2['Year'].str.split('_', expand=True)

In [369]:
df2

Unnamed: 0,Valid Reason Call,Year,Number of Calls/Chat,Category
0,victim of violence seeking for help,2018,4411,PhoneCalls
1,legal information,2018,606,PhoneCalls
2,victim of stalking seeking for help,2018,577,PhoneCalls
3,useful phone numbers for out of target calls,2018,1869,PhoneCalls
4,reporting of violence,2018,929,PhoneCalls
...,...,...,...,...
85,reporting of public services malfunctions,2020,3,Chat
86,international after hours calls,2020,4,Chat
87,victim of discrimination seeking for help,2020,0,Chat
88,reporting of media misinformation,2020,0,Chat


In [370]:
df2.to_csv('validcallsbychanneltype.csv', index=False)

In [371]:
# DF3
df3

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,2013,2014,2015,2016,2017,2018,2019,2020
0,Type of user,Reasons of calls,,,,,,,,
1,Users calling for its self,victim of violence seeking for help,7058.0,4670.0,3485.0,3617.0,3281.0,4565.0,4323.0,8588.0
2,Users calling for its self,legal information,719.0,891.0,394.0,420.0,373.0,626.0,254.0,392.0
3,Users calling for its self,victim of stalking seeking for help,1460.0,872.0,684.0,671.0,568.0,610.0,592.0,933.0
4,Users calling for its self,useful phone numbers for out of target calls,4596.0,2412.0,1425.0,1018.0,1017.0,1881.0,1074.0,3488.0
5,Users calling for its self,reporting of violence,679.0,231.0,127.0,274.0,327.0,344.0,209.0,847.0
6,Users calling for its self,information about national shelters for victim...,4451.0,4123.0,3747.0,2913.0,2370.0,2245.0,1862.0,3130.0
7,Users calling for its self,information about the helpline 1522,1411.0,1345.0,1204.0,1234.0,1361.0,2999.0,3891.0,3919.0
8,Users calling for its self,information for professionals on the procedure...,30.0,10.0,7.0,8.0,4.0,5.0,5.0,24.0
9,Users calling for its self,information on legal responsibility of the pub...,10.0,1.0,1.0,5.0,5.0,2.0,4.0,12.0


In [372]:
# Drop missing rows columns
df3 = df3.drop([0, 47])

In [373]:
df3.columns = ['User_Type', 'Valid Call Reason', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020']

In [374]:
df3.columns

Index(['User_Type', 'Valid Call Reason', '2013', '2014', '2015', '2016',
       '2017', '2018', '2019', '2020'],
      dtype='object')

In [375]:
df3 = pd.melt(df3, id_vars=['User_Type','Valid Call Reason'], var_name='Year', value_name='Total Calls')

In [376]:
df3.head(5)

Unnamed: 0,User_Type,Valid Call Reason,Year,Total Calls
0,Users calling for its self,victim of violence seeking for help,2013,7058.0
1,Users calling for its self,legal information,2013,719.0
2,Users calling for its self,victim of stalking seeking for help,2013,1460.0
3,Users calling for its self,useful phone numbers for out of target calls,2013,4596.0
4,Users calling for its self,reporting of violence,2013,679.0


In [377]:
df3.to_csv('validcallbyusertype.csv',index=False)

In [378]:
# Df 4
df4

Unnamed: 0,Valid calls,2013,2014,2015,2016,2017,2018,2019,2020
0,0-2 AM,761.0,566.0,463.0,350.0,431.0,606.0,676.0,1189.0
1,3-5 AM,338.0,206.0,180.0,134.0,140.0,238.0,245.0,415.0
2,6-8 AM,1374.0,953.0,684.0,649.0,549.0,755.0,663.0,1164.0
3,9-11 AM,5580.0,4107.0,3080.0,2816.0,2451.0,3436.0,2872.0,4563.0
4,12-14 PM,4521.0,3408.0,2508.0,2380.0,2077.0,2925.0,2598.0,4752.0
5,15-17 PM,4723.0,3515.0,2688.0,2499.0,2220.0,3018.0,2724.0,4633.0
6,18-20 PM,3467.0,2559.0,2027.0,1791.0,1684.0,2289.0,2133.0,3831.0
7,21-23 PM,2223.0,1610.0,1268.0,1113.0,1151.0,1507.0,1513.0,2524.0
8,,,,,,,,,
9,Calls from victimes,,,,,,,,


In [379]:
df4 = df4.drop([8,9,18])

In [380]:
# Seperate valid columns 
valid_calls = df4.iloc[0:8,:]

In [381]:
# Categorize the calls
valid_calls['Call_Type'] = 'Valid_Calls'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valid_calls['Call_Type'] = 'Valid_Calls'


In [382]:
# Seperate valid columns
victimes_call = df4.iloc[10:18, :]

In [383]:
# Categorize the calls
victimes_call['Call_Type'] = 'Victimes'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  victimes_call['Call_Type'] = 'Victimes'


In [384]:
# Merge the dfs
df4 = pd.concat([valid_calls, victimes_call], axis=0)

In [385]:
df4 = df4.melt(id_vars=['Valid calls','Call_Type'],value_vars=[2013, 2014, 2015, 2016, 2017, 2018, 2019,2020],var_name='Year', value_name='Number of Calls' )

In [386]:
df4.head(5)

Unnamed: 0,Valid calls,Call_Type,Year,Number of Calls
0,0-2 AM,Valid_Calls,2013,761.0
1,3-5 AM,Valid_Calls,2013,338.0
2,6-8 AM,Valid_Calls,2013,1374.0
3,9-11 AM,Valid_Calls,2013,5580.0
4,12-14 PM,Valid_Calls,2013,4521.0


In [387]:
# Rename Valid Calls
df4.rename(columns={'Valid calls':'Call_Time'},inplace=True)

In [388]:
df4.head(5)

Unnamed: 0,Call_Time,Call_Type,Year,Number of Calls
0,0-2 AM,Valid_Calls,2013,761.0
1,3-5 AM,Valid_Calls,2013,338.0
2,6-8 AM,Valid_Calls,2013,1374.0
3,9-11 AM,Valid_Calls,2013,5580.0
4,12-14 PM,Valid_Calls,2013,4521.0


In [389]:
df4.to_csv('validcallbyvictimestimeofday.csv',index=False)

In [390]:
#
df5

Unnamed: 0,Users,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,Years,Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Total
1,2013,2283,4152,3606,3712,3292,3388,2554,22987
2,2014,1672,2956,2740,2650,2522,2460,1924,16924
3,2015,1350,2262,2104,1973,1869,1823,1517,12898
4,2016,1112,1999,1970,1857,1802,1646,1346,11732
5,2017,1161,1720,1682,1623,1577,1555,1385,10703
6,2018,1585,2561,2306,2196,2198,2118,1810,14774
7,2019,1428,2254,2087,2055,1908,2021,1671,13424
8,2020,2695,3808,3548,3563,3269,3237,2951,23071
9,,,,,,,,,


In [391]:
# Filter out of the N/A rows
df5 = df5.drop([9, 20])

In [392]:
users_df = df5.iloc[1:9, :]

In [393]:
users_df

Unnamed: 0,Users,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
1,2013,2283,4152,3606,3712,3292,3388,2554,22987
2,2014,1672,2956,2740,2650,2522,2460,1924,16924
3,2015,1350,2262,2104,1973,1869,1823,1517,12898
4,2016,1112,1999,1970,1857,1802,1646,1346,11732
5,2017,1161,1720,1682,1623,1577,1555,1385,10703
6,2018,1585,2561,2306,2196,2198,2118,1810,14774
7,2019,1428,2254,2087,2055,1908,2021,1671,13424
8,2020,2695,3808,3548,3563,3269,3237,2951,23071


In [394]:
# Select the rows for the victim table
victims_df = df5.iloc[12:20, :]

In [395]:
victims_df

Unnamed: 0,Users,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
13,2014,680,1227,1119,1121,1090,989,762,6988
14,2015,592,887,857,844,751,746,624,5301
15,2016,504,917,940,866,863,784,601,5475
16,2017,538,825,756,735,721,702,673,4950
17,2018,590,1090,1000,936,987,872,772,6247
18,2019,638,1017,906,903,848,837,678,5827
19,2020,1351,1899,1751,1842,1661,1638,1476,11618


In [396]:
# Rename the column
users_df.columns = ['Years', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday','Total']
victims_df.columns = ['Years', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday','Total']


In [397]:
users_df['User_Type'] = 'User'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  users_df['User_Type'] = 'User'


In [398]:
users_df

Unnamed: 0,Years,Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Total,User_Type
1,2013,2283,4152,3606,3712,3292,3388,2554,22987,User
2,2014,1672,2956,2740,2650,2522,2460,1924,16924,User
3,2015,1350,2262,2104,1973,1869,1823,1517,12898,User
4,2016,1112,1999,1970,1857,1802,1646,1346,11732,User
5,2017,1161,1720,1682,1623,1577,1555,1385,10703,User
6,2018,1585,2561,2306,2196,2198,2118,1810,14774,User
7,2019,1428,2254,2087,2055,1908,2021,1671,13424,User
8,2020,2695,3808,3548,3563,3269,3237,2951,23071,User


In [399]:
victims_df['User_Type'] = 'Victimes'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  victims_df['User_Type'] = 'Victimes'


In [400]:
victims_df

Unnamed: 0,Years,Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Total,User_Type
13,2014,680,1227,1119,1121,1090,989,762,6988,Victimes
14,2015,592,887,857,844,751,746,624,5301,Victimes
15,2016,504,917,940,866,863,784,601,5475,Victimes
16,2017,538,825,756,735,721,702,673,4950,Victimes
17,2018,590,1090,1000,936,987,872,772,6247,Victimes
18,2019,638,1017,906,903,848,837,678,5827,Victimes
19,2020,1351,1899,1751,1842,1661,1638,1476,11618,Victimes


In [401]:
users_df = users_df.reset_index(drop=True)

In [402]:
users_df

Unnamed: 0,Years,Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Total,User_Type
0,2013,2283,4152,3606,3712,3292,3388,2554,22987,User
1,2014,1672,2956,2740,2650,2522,2460,1924,16924,User
2,2015,1350,2262,2104,1973,1869,1823,1517,12898,User
3,2016,1112,1999,1970,1857,1802,1646,1346,11732,User
4,2017,1161,1720,1682,1623,1577,1555,1385,10703,User
5,2018,1585,2561,2306,2196,2198,2118,1810,14774,User
6,2019,1428,2254,2087,2055,1908,2021,1671,13424,User
7,2020,2695,3808,3548,3563,3269,3237,2951,23071,User


In [403]:
users_df

Unnamed: 0,Years,Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Total,User_Type
0,2013,2283,4152,3606,3712,3292,3388,2554,22987,User
1,2014,1672,2956,2740,2650,2522,2460,1924,16924,User
2,2015,1350,2262,2104,1973,1869,1823,1517,12898,User
3,2016,1112,1999,1970,1857,1802,1646,1346,11732,User
4,2017,1161,1720,1682,1623,1577,1555,1385,10703,User
5,2018,1585,2561,2306,2196,2198,2118,1810,14774,User
6,2019,1428,2254,2087,2055,1908,2021,1671,13424,User
7,2020,2695,3808,3548,3563,3269,3237,2951,23071,User


In [404]:
victims_df.reset_index(drop=True)

Unnamed: 0,Years,Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Total,User_Type
0,2014,680,1227,1119,1121,1090,989,762,6988,Victimes
1,2015,592,887,857,844,751,746,624,5301,Victimes
2,2016,504,917,940,866,863,784,601,5475,Victimes
3,2017,538,825,756,735,721,702,673,4950,Victimes
4,2018,590,1090,1000,936,987,872,772,6247,Victimes
5,2019,638,1017,906,903,848,837,678,5827,Victimes
6,2020,1351,1899,1751,1842,1661,1638,1476,11618,Victimes


In [405]:
df5 = pd.concat([users_df, victims_df], ignore_index=True)

In [406]:
df5 = df5.melt(id_vars=['Years','User_Type','Total'],value_vars=['Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'],var_name='Year', value_name='Number of Calls' )

In [407]:
df5.head(5)

Unnamed: 0,Years,User_Type,Total,Year,Number of Calls
0,2013,User,22987,Sunday,2283
1,2014,User,16924,Sunday,1672
2,2015,User,12898,Sunday,1350
3,2016,User,11732,Sunday,1112
4,2017,User,10703,Sunday,1161


In [408]:
df5.rename(columns={'Total':'Total_Calls'}, inplace=True)

In [409]:
df5.head(5)

Unnamed: 0,Years,User_Type,Total_Calls,Year,Number of Calls
0,2013,User,22987,Sunday,2283
1,2014,User,16924,Sunday,1672
2,2015,User,12898,Sunday,1350
3,2016,User,11732,Sunday,1112
4,2017,User,10703,Sunday,1161


In [410]:
df5.to_csv('callsbydayoftheweek.csv',index=False)

In [411]:
# Df 6
df6

Unnamed: 0.1,Unnamed: 0,2013,2014,2015,2016,2017,2018,2019,2020
0,Gender,,,,,,,,
1,Male,2194.0,1890.0,1446.0,1078.0,1294.0,1660.0,1445.0,2606.0
2,Female,20379.0,15014.0,11451.0,10654.0,9409.0,13114.0,11979.0,20458.0
3,N.A.,414.0,20.0,1.0,0.0,0.0,0.0,0.0,7.0
4,Total,22987.0,16924.0,12898.0,11732.0,10703.0,14774.0,13424.0,23071.0
5,,,,,,,,,
6,Nationality,,,,,,,,
7,Italian,18875.0,15240.0,11931.0,10741.0,9714.0,13590.0,12147.0,21309.0
8,Foreign,2914.0,1623.0,966.0,991.0,989.0,1183.0,1276.0,1754.0
9,N.A.,1198.0,61.0,1.0,0.0,0.0,1.0,1.0,8.0


In [412]:
df6 = df6.drop([5,11,17,23])

In [413]:
df6 = df6.rename(columns={'Unnamed: 0':'Demographics'})

In [414]:
df6

Unnamed: 0,Demographics,2013,2014,2015,2016,2017,2018,2019,2020
0,Gender,,,,,,,,
1,Male,2194.0,1890.0,1446.0,1078.0,1294.0,1660.0,1445.0,2606.0
2,Female,20379.0,15014.0,11451.0,10654.0,9409.0,13114.0,11979.0,20458.0
3,N.A.,414.0,20.0,1.0,0.0,0.0,0.0,0.0,7.0
4,Total,22987.0,16924.0,12898.0,11732.0,10703.0,14774.0,13424.0,23071.0
6,Nationality,,,,,,,,
7,Italian,18875.0,15240.0,11931.0,10741.0,9714.0,13590.0,12147.0,21309.0
8,Foreign,2914.0,1623.0,966.0,991.0,989.0,1183.0,1276.0,1754.0
9,N.A.,1198.0,61.0,1.0,0.0,0.0,1.0,1.0,8.0
10,Total,22987.0,16924.0,12898.0,11732.0,10703.0,14774.0,13424.0,23071.0


In [415]:
# Gender 
gender_df = df6.iloc[0:5]
gender_df = gender_df.dropna()
gender_df = gender_df.set_index('Demographics').reset_index()
gender_df = gender_df.T.reset_index(drop=True)
gender_df.columns.name = None # Remove the column name


In [416]:
gender_df = gender_df.rename(columns={0: 'Gender - Male', 1: 'Gender - Female', 2: 'Gender - Not Applicable', 3: 'Gender - Total'})
gender_df = gender_df.drop(0)

In [417]:
gender_df['Year'] = range(2013,2021)

In [418]:
gender_df

Unnamed: 0,Gender - Male,Gender - Female,Gender - Not Applicable,Gender - Total,Year
1,2194.0,20379.0,414.0,22987.0,2013
2,1890.0,15014.0,20.0,16924.0,2014
3,1446.0,11451.0,1.0,12898.0,2015
4,1078.0,10654.0,0.0,11732.0,2016
5,1294.0,9409.0,0.0,10703.0,2017
6,1660.0,13114.0,0.0,14774.0,2018
7,1445.0,11979.0,0.0,13424.0,2019
8,2606.0,20458.0,7.0,23071.0,2020


In [419]:
# Nationality
nationality_df = df6.iloc[6:10, :]
nationality_df = nationality_df.dropna()
nationality_df = nationality_df.set_index('Demographics').reset_index()
nationality_df = nationality_df.T.reset_index(drop=True)
nationality_df.columns.name = None # Remove the column name

In [420]:
nationality_df = nationality_df.rename(columns={0: 'Italian', 1: 'Foreign', 2: 'Nationality-Not Applicable', 3: 'Nationality Total'})
nationality_df = nationality_df.drop(0)

In [421]:
nationality_df['Year'] = range(2013,2021)

In [422]:
nationality_df

Unnamed: 0,Italian,Foreign,Nationality-Not Applicable,Nationality Total,Year
1,18875.0,2914.0,1198.0,22987.0,2013
2,15240.0,1623.0,61.0,16924.0,2014
3,11931.0,966.0,1.0,12898.0,2015
4,10741.0,991.0,0.0,11732.0,2016
5,9714.0,989.0,0.0,10703.0,2017
6,13590.0,1183.0,1.0,14774.0,2018
7,12147.0,1276.0,1.0,13424.0,2019
8,21309.0,1754.0,8.0,23071.0,2020


In [423]:
# Disability df
disability_df = df6.iloc[12:16,:]
disability_df = disability_df.dropna()
disability_df = disability_df.set_index('Demographics').reset_index()
disability_df = disability_df.T.reset_index(drop=True)
disability_df.columns.name = None # Remove the column name

In [424]:
disability_df = disability_df.rename(columns={0: 'Disability-Yes', 1: 'Disability- N/A', 2: 'Total'})
disability_df = disability_df.drop(0)

In [425]:
disability_df['Year'] = range(2013,2021)

In [426]:
disability_df

Unnamed: 0,Disability-Yes,Disability- N/A,Total,Year
1,827.0,1606.0,22987.0,2013
2,430.0,824.0,16924.0,2014
3,282.0,1719.0,12898.0,2015
4,349.0,1291.0,11732.0,2016
5,262.0,1057.0,10703.0,2017
6,314.0,478.0,14774.0,2018
7,314.0,349.0,13424.0,2019
8,970.0,5168.0,23071.0,2020


In [427]:
#C 
sexual_orientationdf = pd.DataFrame(df6.drop(index=df6.index[0:16]))
sexual_orientationdf = sexual_orientationdf.set_index('Demographics').reset_index()
sexual_orientationdf = sexual_orientationdf.T.reset_index(drop=True)
sexual_orientationdf.columns.name = None

In [428]:
sexual_orientationdf.rename(columns={0:'Hoxexual',1:'Transsexual',2:'Others',3:'Total'}, inplace=True)
sexual_orientationdf = sexual_orientationdf.drop(0)

In [429]:
sexual_orientationdf['Year'] = range(2013,2021)

In [430]:
merged_df = pd.concat([gender_df,nationality_df,disability_df,sexual_orientationdf], axis=1)

In [431]:
merged_df.to_csv('callsbydemographics.csv',index=False)

In [432]:
# Clean df 7
df7 = df7.dropna()

In [433]:
df7

Unnamed: 0.1,Unnamed: 0,Chiamate valide,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
1,2020-03-01 00:00:00,75.0,58.0,30.0,41.0,45.0,60.0,66.0,34.0
2,2020-03-02 00:00:00,58.0,39.0,50.0,33.0,39.0,65.0,48.0,63.0
3,2020-03-03 00:00:00,54.0,80.0,52.0,62.0,50.0,54.0,49.0,52.0
4,2020-03-04 00:00:00,113.0,89.0,42.0,57.0,34.0,54.0,57.0,38.0
5,2020-03-05 00:00:00,89.0,63.0,51.0,40.0,33.0,65.0,56.0,44.0
...,...,...,...,...,...,...,...,...,...
241,2020-10-27 00:00:00,71.0,77.0,58.0,53.0,51.0,34.0,41.0,75.0
242,2020-10-28 00:00:00,125.0,62.0,54.0,43.0,46.0,35.0,50.0,64.0
243,2020-10-29 00:00:00,102.0,80.0,52.0,23.0,31.0,42.0,53.0,58.0
244,2020-10-30 00:00:00,113.0,76.0,40.0,18.0,53.0,46.0,46.0,57.0


In [434]:
# Rename columns
df7 = df7.rename(columns={'Unnamed: 0':'Time Stamp','Chiamate valide':2013,'Unnamed: 2':2014,'Unnamed: 3':2015,'Unnamed: 4': 2016,'Unnamed: 5':2017,'Unnamed: 6': 2018, 'Unnamed: 7' : 2019,'Unnamed: 8' : 2020})

In [435]:
df7

Unnamed: 0,Time Stamp,2013,2014,2015,2016,2017,2018,2019,2020
1,2020-03-01 00:00:00,75.0,58.0,30.0,41.0,45.0,60.0,66.0,34.0
2,2020-03-02 00:00:00,58.0,39.0,50.0,33.0,39.0,65.0,48.0,63.0
3,2020-03-03 00:00:00,54.0,80.0,52.0,62.0,50.0,54.0,49.0,52.0
4,2020-03-04 00:00:00,113.0,89.0,42.0,57.0,34.0,54.0,57.0,38.0
5,2020-03-05 00:00:00,89.0,63.0,51.0,40.0,33.0,65.0,56.0,44.0
...,...,...,...,...,...,...,...,...,...
241,2020-10-27 00:00:00,71.0,77.0,58.0,53.0,51.0,34.0,41.0,75.0
242,2020-10-28 00:00:00,125.0,62.0,54.0,43.0,46.0,35.0,50.0,64.0
243,2020-10-29 00:00:00,102.0,80.0,52.0,23.0,31.0,42.0,53.0,58.0
244,2020-10-30 00:00:00,113.0,76.0,40.0,18.0,53.0,46.0,46.0,57.0


In [436]:
df7 = pd.melt(df7, id_vars='Time Stamp', value_vars=[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020],var_name='Year',value_name='Number of  Calls',)

In [437]:
df7.head(5)

Unnamed: 0,Time Stamp,Year,Number of Calls
0,2020-03-01,2013,75.0
1,2020-03-02,2013,58.0
2,2020-03-03,2013,54.0
3,2020-03-04,2013,113.0
4,2020-03-05,2013,89.0


In [438]:
df7.to_csv('callsbytimestamp.csv', index=False)

In [439]:
df8 = df8.drop([24])

In [440]:
df8.rename(columns={'Unnamed: 0':'Region','Totale': 'Total'}, inplace=True)

In [441]:
df8

Unnamed: 0,Region,2013,2014,2015,2016,2017,2018,2019,2020,Total
0,Piemonte,1339.0,1205.0,971.0,933.0,793.0,1014.0,945.0,1355.0,8555.0
1,Valle d'Aosta,29.0,23.0,12.0,11.0,11.0,10.0,10.0,18.0,124.0
2,Liguria,584.0,510.0,365.0,343.0,228.0,314.0,309.0,422.0,3075.0
3,Lombardia,2881.0,2140.0,1724.0,1788.0,1620.0,1972.0,1920.0,2926.0,16971.0
4,Trentino-Alto Adige,125.0,91.0,95.0,91.0,54.0,111.0,117.0,175.0,859.0
5,Trento,79.0,61.0,73.0,68.0,39.0,77.0,79.0,106.0,582.0
6,Bolzano,43.0,30.0,22.0,23.0,13.0,34.0,36.0,59.0,260.0
7,Veneto,1524.0,1148.0,756.0,606.0,572.0,852.0,976.0,1283.0,7717.0
8,Friuli-Venezia Giulia,316.0,221.0,170.0,150.0,141.0,187.0,191.0,235.0,1611.0
9,Emilia-Romagna,1286.0,867.0,657.0,582.0,448.0,688.0,682.0,1151.0,6361.0


In [442]:
df8 = df8.melt(id_vars='Region', value_vars=[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020],var_name='Year', value_name='Number of Calls')

In [443]:
df8.head(5)

Unnamed: 0,Region,Year,Number of Calls
0,Piemonte,2013,1339.0
1,Valle d'Aosta,2013,29.0
2,Liguria,2013,584.0
3,Lombardia,2013,2881.0
4,Trentino-Alto Adige,2013,125.0


In [444]:
df8.to_csv('CallsByRegions.csv', index=False)

In [445]:
# DF 9
df9 = df9.drop([24])

In [446]:
df9 = df9.melt(id_vars='Regions', value_vars=[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020],var_name='Year', value_name='Number of Calls')

In [447]:
df9.head(5)

Unnamed: 0,Regions,Year,Number of Calls
0,Piemonte,2013,731.0
1,Valle d'Aosta,2013,19.0
2,Liguria,2013,297.0
3,Lombardia,2013,1510.0
4,Trentino-Alto Adige,2013,64.0


In [448]:
df9.to_csv('callsbyvictim.csv',index=False)

In [449]:
#DF10
df10

Unnamed: 0,Knowledge of the helpline service 1522,2013,2014,2015,2016,2017,2018,2019,2020
0,,,,,,,,,
1,Internet,9239.0,4970.0,3264.0,2422.0,1109.0,2549.0,1494.0,3243.0
2,Tv,1737.0,1367.0,1384.0,1524.0,1693.0,2366.0,3715.0,3125.0
3,Print,986.0,652.0,439.0,520.0,494.0,730.0,751.0,647.0
4,Public Service,986.0,861.0,486.0,596.0,534.0,698.0,907.0,609.0
5,Phone book,83.0,138.0,186.0,128.0,204.0,380.0,199.0,248.0
6,Brochure,51.0,56.0,13.0,54.0,45.0,77.0,115.0,174.0
7,relative / friend / acquaintance,87.0,346.0,52.0,78.0,53.0,47.0,63.0,47.0
8,Radio,25.0,7.0,10.0,18.0,27.0,23.0,33.0,28.0
9,Others,1039.0,490.0,197.0,237.0,179.0,149.0,159.0,21.0


In [450]:
# DF 10
df10 = df10.drop([0, 14])

In [451]:
df10 = df10.melt(id_vars='Knowledge of the helpline service 1522', value_vars=[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020],var_name='Year', value_name='Number of Calls')

In [452]:
df10.head(5)

Unnamed: 0,Knowledge of the helpline service 1522,Year,Number of Calls
0,Internet,2013,9239.0
1,Tv,2013,1737.0
2,Print,2013,986.0
3,Public Service,2013,986.0
4,Phone book,2013,83.0


In [453]:
df10.to_csv('callsbyhelpline.csv',index=False)

In [454]:
df11 = df11.drop([45])

In [455]:
df11 = df11.melt(id_vars=['Regions','First contact (Yes/No)'],value_vars=[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020],var_name='Year', value_name='Number of Calls')

In [456]:
df11.head(5)

Unnamed: 0,Regions,First contact (Yes/No),Year,Number of Calls
0,,,2013,
1,Piemonte,Yes,2013,1114.0
2,,No,2013,225.0
3,Valle d'Aosta,Yes,2013,26.0
4,,No,2013,3.0


In [457]:
df11.to_csv('callsbyfirstcontat.csv',index=False)

In [458]:
# DF12
df12

Unnamed: 0.1,Unnamed: 0,2013,2014,2015,2016,2017,2018,2019,2020
0,Gender,,,,,,,,
1,Female,9910,6703.0,5108.0,5284.0,4767.0,6069.0,5692.0,11191.0
2,Male,784,284.0,192.0,191.0,183.0,178.0,135.0,420.0
3,N.A.,107,1.0,1.0,0.0,0.0,0.0,0.0,7.0
4,Total,10801,6988.0,5301.0,5475.0,4950.0,6247.0,5827.0,11618.0
5,,,,,,,,,
6,Age group,,,,,,,,
7,up to 17 years old,54,60.0,39.0,39.0,51.0,61.0,66.0,206.0
8,18-24,565,446.0,327.0,351.0,310.0,421.0,463.0,840.0
9,25-34,1729,1249.0,961.0,946.0,905.0,997.0,1149.0,1625.0


In [459]:
# Create seperate dataframes for categories
gender_df = df12.iloc[1:5,:]
age_df = df12.iloc[7:16, :]
martial_statusdf = df12.iloc[18:26, :]
employment_statusdf = df12.iloc[28:37, :]
education_qualificationdf = df12.iloc[39:45, :]
citizenship_df = df12.iloc[47:51, :]

In [460]:
gender_df = gender_df.melt(id_vars='Unnamed: 0',value_vars=[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020], var_name='Year', value_name='Number of Calls')

In [461]:
gender_df.rename(columns={'Unnamed: 0':'Gender'}, inplace=True)

In [462]:
gender_df.head(5)

Unnamed: 0,Gender,Year,Number of Calls
0,Female,2013,9910.0
1,Male,2013,784.0
2,N.A.,2013,107.0
3,Total,2013,10801.0
4,Female,2014,6703.0


In [463]:
# Age df
age_df

Unnamed: 0.1,Unnamed: 0,2013,2014,2015,2016,2017,2018,2019,2020
7,up to 17 years old,54,60.0,39.0,39.0,51.0,61.0,66.0,206.0
8,18-24,565,446.0,327.0,351.0,310.0,421.0,463.0,840.0
9,25-34,1729,1249.0,961.0,946.0,905.0,997.0,1149.0,1625.0
10,35-44,2852,1855.0,1380.0,1399.0,1285.0,1494.0,1535.0,2026.0
11,45-54,2430,1518.0,1111.0,1215.0,974.0,1430.0,1148.0,1768.0
12,55-64,1300,767.0,606.0,621.0,484.0,671.0,591.0,1053.0
13,65 and over,1050,581.0,475.0,516.0,396.0,501.0,438.0,968.0
14,N.A.,821,512.0,402.0,388.0,545.0,672.0,437.0,3132.0
15,Total,10801,6988.0,5301.0,5475.0,4950.0,6247.0,5827.0,11618.0


In [464]:
age_df = age_df.melt(id_vars='Unnamed: 0', value_vars=[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020], var_name='Year',value_name='Number of Calls' )

In [465]:
age_df.rename(columns={'Unnamed: 0':'Age Groups'}, inplace=True)

In [466]:
age_df.head(5)

Unnamed: 0,Age Groups,Year,Number of Calls
0,up to 17 years old,2013,54
1,18-24,2013,565
2,25-34,2013,1729
3,35-44,2013,2852
4,45-54,2013,2430


In [467]:
martial_statusdf = martial_statusdf.melt(id_vars='Unnamed: 0', value_vars=[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020], var_name='Year',value_name='Number of Calls' )

In [468]:
martial_statusdf.rename(columns={'Unnamed: 0':'Marital_Status'}, inplace=True)

In [469]:
martial_statusdf.head(5)

Unnamed: 0,Marital_Status,Year,Number of Calls
0,Married persons (including separated persons) ...,2013,4812
1,Divorced persons,2013,521
2,Single persons (never married and never in sam...,2013,2789
3,Separated persons,2013,1337
4,Widowed persons or widow/widower of same sex c...,2013,471


In [470]:
# Employment status
employment_statusdf = employment_statusdf.melt(id_vars='Unnamed: 0', value_vars=[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020], var_name='Year',value_name='Number of Calls' )

In [471]:
employment_statusdf.rename(columns={'Unnamed: 0' :'Employment_Status'}, inplace=True)

In [472]:
employment_statusdf.head(5)

Unnamed: 0,Employment_Status,Year,Number of Calls
0,Housewife,2013,1603
1,"Unemployed, seeking for job",2013,2636
2,Work illegally,2013,340
3,Employed person,2013,3722
4,Retired,2013,1332


In [473]:
# Citizen df
education_qualificationdf = education_qualificationdf.melt(id_vars='Unnamed: 0', value_vars=[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020], var_name='Year',value_name='Number of Calls' )

In [474]:
education_qualificationdf.rename(columns={'Unnamed: 0' :'Education Qualification'}, inplace=True)

In [475]:
education_qualificationdf.head(5)

Unnamed: 0,Education Qualification,Year,Number of Calls
0,University degree,2013,1464
1,Diploma of upper secondary education and Certi...,2013,3955
2,Lower secondary school certificate,2013,3047
3,"Primary school certificate, no educational degree",2013,737
4,No response,2013,.


In [476]:
citizenship_df = citizenship_df.melt(id_vars='Unnamed: 0', value_vars=[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020], var_name='Year',value_name='Number of Calls' )

In [477]:
citizenship_df.rename(columns={'Unnamed: 0' :'Citizenship_Status'}, inplace=True)

In [478]:
citizenship_df.head(5)

Unnamed: 0,Citizenship_Status,Year,Number of Calls
0,Italian,2013,8973.0
1,Not Italian,2013,1226.0
2,N.A.,2013,602.0
3,Total,2013,10801.0
4,Italian,2014,5666.0


In [479]:
merged_files = pd.concat([gender_df, age_df,martial_statusdf,employment_statusdf,education_qualificationdf,citizenship_df],axis=1, join='outer')

In [480]:
merged_files.to_csv('callsbysocialfactors.csv', index=False)

In [481]:
# df 13
df13

Unnamed: 0,Tipo di violenza,2013,2014,2015,2016,2017,2018,2019,2020
0,,,,,,,,,
1,Physical violence,5080.0,3306.0,2452.0,2592.0,2195.0,2851.0,2589.0,5588.0
2,Psychological violence,3468.0,2251.0,1990.0,1984.0,1729.0,2363.0,2186.0,3883.0
3,Sexual harassment with contact,350.0,289.0,112.0,191.0,258.0,256.0,274.0,654.0
4,Sexual violence,253.0,100.0,116.0,63.0,62.0,49.0,38.0,164.0
5,Economic violence,176.0,134.0,86.0,92.0,87.0,57.0,90.0,155.0
6,Mobbing,76.0,63.0,44.0,44.0,58.0,43.0,75.0,123.0
7,Threats,63.0,29.0,12.0,11.0,20.0,21.0,10.0,36.0
8,No response,310.0,297.0,195.0,236.0,252.0,243.0,210.0,390.0
9,N.A.,1025.0,519.0,294.0,262.0,289.0,364.0,355.0,625.0


In [482]:
df13 = df13.drop([0,11])

In [483]:
df13 = df13.melt(id_vars='Tipo di violenza', value_vars=[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020], var_name='Year',value_name='Number of Calls' )

In [484]:
df13.to_csv('callsbyviolence.csv',index=False)

In [485]:
df14 = df14.drop([0,7])

In [486]:
df14 = df14.melt(id_vars='Violent act frequency', value_vars=[2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020], var_name='Year',value_name='Number of Calls' )

In [487]:
df14.to_csv('callsbyviolencebyfreq.csv', index=False)