In [2]:
import pandas as pd
import IPython as ip
import numpy as np

# Pre-condition: char_list is a list a character digits.
def sum_char_list(char_list):
    sum = 0

    for element in char_list:
        sum += int(element)

    return sum

def convert_1_0_strings_to_true_false_strings(df, column_name):
    df.loc[df[column_name] == "1", column_name] = True
    df.loc[df[column_name] == "0", column_name] = False

if __name__ == "__main__":
    # Import data.
    original_df = pd.read_fwf(filepath_or_buffer="data/ASR122016.TXT",
                     names=["Numeric State Code", "ORI Code", "Population Group (inclusive)", "Division", "Year", "Metropolitan Statistical Area Number", "Adult Male Reported?", "Adult Female Reported?", "Juvenile Reported?", "Adjustment", "Offense Code", "Male Pre-Teens", "Male Teenagers", "Male Young Adults", "Male Adults", "Male Seniors", "Female Pre-Teens", "Female Teenagers", "Female Young Adults", "Female Adults", "Female Seniors"],
                     dtype="object",
                     colspecs=[(1, 3), (3, 10), (10, 12), (12, 13), (13, 15), (15, 18), (18, 19), (19, 20), (20, 21), (21, 22), (22, 25), (40, 58), (58, 94), (94, 166), (166, 220), (220, 238), (238, 256), (256, 292), (292, 364), (364, 418), (418, 436)],
                              nrows=1000)
    print("Import data done!")

    # Remove headers.
    df = original_df.loc[pd.notna(original_df["Female Seniors"])].copy()
    print("Remove headers done!")

    # Assign full year name.
    df.loc[:, "Year"] = "20" + df["Year"]
    print("Assign full year name done!")

    # Change 1/0 to True/False.
    convert_1_0_strings_to_true_false_strings(df, "Adult Male Reported?")
    convert_1_0_strings_to_true_false_strings(df, "Adult Female Reported?")
    convert_1_0_strings_to_true_false_strings(df, "Juvenile Reported?")
    print("Change 1/0 to True/False done!")

    # Sum sequence representing number of criminals.
    for (index_label, row_value) in df["Male Pre-Teens"].items():
        df["Male Pre-Teens"][index_label] = sum_char_list(list(row_value))

    for (index_label, row_value) in df["Male Teenagers"].items():
        df["Male Teenagers"][index_label] = sum_char_list(list(row_value))

    for (index_label, row_value) in df["Male Young Adults"].items():
        df["Male Young Adults"][index_label] = sum_char_list(list(row_value))

    for (index_label, row_value) in df["Male Adults"].items():
        df["Male Adults"][index_label] = sum_char_list(list(row_value))

    for (index_label, row_value) in df["Male Seniors"].items():
        df["Male Seniors"][index_label] = sum_char_list(list(row_value))

    for (index_label, row_value) in df["Female Pre-Teens"].items():
        df["Female Pre-Teens"][index_label] = sum_char_list(list(row_value))

    for (index_label, row_value) in df["Female Teenagers"].items():
        df["Female Teenagers"][index_label] = sum_char_list(list(row_value))

    for (index_label, row_value) in df["Female Young Adults"].items():
        df["Female Young Adults"][index_label] = sum_char_list(list(row_value))

    for (index_label, row_value) in df["Female Adults"].items():
        df["Female Adults"][index_label] = sum_char_list(list(row_value))

    for (index_label, row_value) in df["Female Seniors"].items():
        df["Female Seniors"][index_label] = sum_char_list(list(row_value))
    print("Sum sequence representing number of criminals done!")

    # Data type change.
    df = df.astype(dtype={"Year":"int",
                          "Adult Male Reported?":"boolean",
                          "Adult Female Reported?":"boolean",
                          "Juvenile Reported?":"boolean",
                          "Male Pre-Teens":"int",
                          "Male Teenagers":"int",
                          "Male Young Adults":"int",
                          "Male Adults":"int",
                          "Male Seniors":"int",
                          "Female Pre-Teens":"int",
                          "Female Teenagers":"int",
                          "Female Young Adults":"int",
                          "Female Adults":"int",
                          "Female Seniors":"int"})
    print("Data type change done!")
    
    with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
        ip.display.display(df)







Unnamed: 0,Numeric State Code,ORI Code,Population Group (inclusive),Division,Year,Metropolitan Statistical Area Number,Adult Male Reported?,Adult Female Reported?,Juvenile Reported?,Adjustment,Offense Code,Male Pre-Teens,Male Teenagers,Male Young Adults,Male Adults,Male Seniors,Female Pre-Teens,Female Teenagers,Female Young Adults,Female Adults,Female Seniors
0,1,ALAST00,8D,6,16,,0,,3,,0,0,000000000000000000000000000000000000,000000 0ALABAMA HIGHWAY PATROL ALA ...,0000000000000000000000000000000000000000000000...,0,,,,,
1,1,ALDI003,7,6,16,,0,,0,3.0,0,0,000000000000000000000000000000000000,000000 1POARCH CREEK TRIBAL ALA ...,0000000000000000000000000000000000000000000000...,0,,,,,
2,1,ALDI003,7,6,16,,1,0.0,0,3.0,30,0,000000000000000000000000000000000000,0000000000000000000000000000000000000000000000...,0000000000000000000000000000000000000000000000...,0,0.0,000000000000000000000000000000000000,0000000000000000000000000000000000000000000000...,0000000000000000000000000000000000000000000000...,0.0
3,1,ALDI003,7,6,16,,1,0.0,0,3.0,40,0,000000000000000000000000000000000000,0000000000000000000000000000000000000000000000...,0000000020000000010000000000000000020000000010...,0,0.0,000000000000000000000000000000000000,0000000000000000000000000000000000000000000000...,0000000000000000000000000000000000000000000000...,0.0
4,1,ALDI003,7,6,16,,1,1.0,0,3.0,60,0,000000000000000000000000000000000000,0000000000000000010000000000000000000000000000...,0000000050000000030000000000000000050000000060...,2,0.0,000000000000000000000000000000000000,0000000000000000000000000000000000000000000000...,0000000030000000030000000010000000040000000000...,0.0
5,1,ALDI003,7,6,16,,1,0.0,0,3.0,70,0,000000000000000000000000000000000000,0000000000000000000000000000000000000000000000...,0000000000000000000000000000000000000000000000...,0,0.0,000000000000000000000000000000000000,0000000000000000000000000000000000000000000000...,0000000000000000000000000000000000000000000000...,0.0
6,1,ALDI003,7,6,16,,1,1.0,1,3.0,80,0,000000000000000002000000000000000000,0000000000000000000000000000000000000000000000...,0000000000000000010000000000000000000000000000...,0,0.0,000000000000000000000000000000000000,0000000000000000000000000000000000000000000000...,0000000010000000000000000000000000000000000000...,0.0
7,1,ALDI003,7,6,16,,1,0.0,0,3.0,100,0,000000000000000000000000000000000000,0000000000000000000000000000000000000000000000...,0000000010000000010000000000000000000000000000...,0,0.0,000000000000000000000000000000000000,0000000000000000000000000000000000000000000000...,0000000000000000000000000000000000000000000000...,0.0
8,1,ALDI003,7,6,16,,0,1.0,0,3.0,110,0,000000000000000000000000000000000000,0000000000000000000000000000000000000000000000...,0000000000000000000000000000000000000000000000...,0,0.0,000000000000000000000000000000000000,0000000000000000000000000000000000000000000000...,0000000000000000000000000000000000010000000000...,0.0
9,1,ALDI003,7,6,16,,0,1.0,0,3.0,130,0,000000000000000000000000000000000000,0000000000000000000000000000000000000000000000...,0000000000000000000000000000000000000000000000...,0,0.0,000000000000000000000000000000000000,0000000000000000000000000000000000000000000000...,0000000000000000000000000000000000000000000000...,0.0
