***Data processing***

Import the packages

In [101]:
import pandas as pd
import numpy as np

Load the data

In [102]:
DIR = "../Analysis Data/"
survey = pd.read_csv(f"{DIR}earthquake_data.csv")

Get rid of invalid records

In [103]:
col_names = [name for name in survey.columns]
records_to_drop = []
n_invalid_cells = {}

for name in col_names:
    n_invalid_cells[name] = 0
    
    for idx in survey.index:
        cell = survey[name][idx]
        
        if pd.isna(cell):
            records_to_drop.append(idx)
            n_invalid_cells[name] += 1
            
survey_valid_data = survey.drop(index=records_to_drop)

Get possible answers and missing values

In [104]:
for name in col_names:
    print(f"Column name: {name}")
    print(f"Possible answers: {list(np.unique([survey_valid_data[name][idx] for idx in survey_valid_data.index]))}")
    print(f"Missing values: {n_invalid_cells[name]}/{len(survey.index)}")
    print()

Column name: In general, how worried are you about earthquakes?
Possible answers: ['Extremely worried', 'Not at all worried', 'Not so worried', 'Somewhat worried', 'Very worried']
Missing values: 0/1013

Column name: How worried are you about the Big One, a massive, catastrophic earthquake?
Possible answers: ['Extremely worried', 'Not at all worried', 'Not so worried', 'Somewhat worried', 'Very worried']
Missing values: 0/1013

Column name: Do you think the "Big One" will occur in your lifetime?
Possible answers: ['No', 'Yes']
Missing values: 0/1013

Column name: Have you ever experienced an earthquake?
Possible answers: ['No', 'Yes, one or more major ones', 'Yes, one or more minor ones']
Missing values: 7/1013

Column name: Have you or anyone in your household taken any precautions for an earthquake (packed an earthquake survival kit, prepared an evacuation plan, etc.)?
Possible answers: ['No', 'Yes']
Missing values: 7/1013

Column name: How familiar are you with the San Andreas Fault

Sort the columns

In [105]:
survey_valid_data_sorted = survey_valid_data.reindex(columns=col_names[-4:] + col_names[:-4])
survey_valid_data_sorted = survey_valid_data_sorted.sort_values(["Age"])

Change the columns' names

In [106]:
new_col_names = ["Age", "Gender", "Household income", "US Region", "Earthquakes fear", "The \"Big One\" fear",
                 "The \"Big One\" experiencing possible", "Earthquake experienced", "Earthquake precautions taken",
                 "San Andreas Fault line familiarity", "Yellowstone Supervolcano familiarity"]
col_map = {survey_valid_data_sorted.columns[idx]: new_col_names[idx] for idx in range(len(new_col_names))}

survey_modified_columns_names = survey_valid_data_sorted.rename(columns=col_map)
survey_modified_columns_names.to_csv(path_or_buf=f"{DIR}earthquake_data_modified_column_names.csv", index=False)

Create a table containing of the age, the gender and the answer to the question "Do you think the "Big One" will occur in your lifetime?"

In [107]:
big_one_table = survey_modified_columns_names[["Age", "Gender", "The \"Big One\" experiencing possible"]]
big_one_table.to_csv(path_or_buf=f"{DIR}earthquake_data_big_one.csv", index=False)