In [9]:
# Module Importations
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [10]:
# Constants
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout = True)
plt.rc(
    "axes",
    labelweight = "bold",
    labelsize = "large",
    titleweight = "bold",
    titlesize = 14,
    titlepad = 10,
)

In [11]:
# Load Datasets
accidents = pd.read_csv(r'C:\Developer\scratch-pad-python\Datasets\US_Accidents_Dec20.csv')
autos = pd.read_csv(r'C:\Developer\scratch-pad-python\Datasets\Automobile_data.csv')
concrete = pd.read_excel(r'C:\Developer\scratch-pad-python\Datasets\Concrete_Data.xls')
customer = pd.read_csv(r'C:\Developer\scratch-pad-python\Datasets\Customer-Value-Analysis.csv')

In [12]:
# Mathematical Transforms - Relationships in mathematical forms

# Deriving stroke ratio
def return_stroke_ratio(stroke, bore):
    try:
        # Convert data type
        stroke = float(stroke)
        bore = float(bore)

        # Calculate stroke ratio 
        stroke_ratio = stroke / bore

    # Handle conversions that fail gracefully
    except:
        stroke_ratio = 0

    return stroke_ratio 

autos['stroke_ratio'] = autos.apply(lambda row: return_stroke_ratio(row['stroke'], row['bore']), axis = 1) # Research identifies stroke ratio as useful parameter

autos[['stroke', 'bore', 'stroke_ratio']].head()

Unnamed: 0,stroke,bore,stroke_ratio
0,2.68,3.47,0.772334
1,2.68,3.47,0.772334
2,3.47,2.68,1.294776
3,3.4,3.19,1.065831
4,3.4,3.19,1.065831


In [13]:
# Mathematical Transforms - Relationships in mathematical forms

# Deriving displacement
def return_displacement(stroke, bore, num_of_cylinders):
    try:
        # Convert data types
        stroke = float(stroke)
        bore = float(bore)
        cylinders_int = 0

        numbers_dict = {"two": 2, "three": 3, "four": 4, "five": 5, "six": 6, "eight": 8, "ten": 10, "twelve": 12}
        for key, value in numbers_dict.items():
            if num_of_cylinders == key:
                cylinders_int = value

        # Calculate displacement
        displacement = np.pi * ((0.5 * bore) ** 2) * stroke * cylinders_int

    # Handle conversions that fail
    except:
        displacement = 0

    return displacement

autos['displacement'] = autos.apply(lambda row: return_displacement(row['stroke'], row['bore'], row['num-of-cylinders']), axis = 1) # Displacement can also be a useful parameter

autos[['stroke', 'bore', 'num-of-cylinders', 'displacement']].head()

Unnamed: 0,stroke,bore,num-of-cylinders,displacement
0,2.68,3.47,four,101.377976
1,2.68,3.47,four,101.377976
2,3.47,2.68,six,117.446531
3,3.4,3.19,four,108.695147
4,3.4,3.19,five,135.868934


In [18]:
accidents.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4232541 entries, 0 to 4232540
Data columns (total 50 columns):
 #   Column                 Dtype  
---  ------                 -----  
 0   ID                     object 
 1   Source                 object 
 2   TMC                    float64
 3   Severity               int64  
 4   Start_Time             object 
 5   End_Time               object 
 6   Start_Lat              float64
 7   Start_Lng              float64
 8   End_Lat                float64
 9   End_Lng                float64
 10  Distance(mi)           float64
 11  Description            object 
 12  Number                 float64
 13  Street                 object 
 14  Side                   object 
 15  City                   object 
 16  County                 object 
 17  State                  object 
 18  Zipcode                object 
 19  Country                object 
 20  Timezone               object 
 21  Airport_Code           object 
 22  Weather_Timestamp 

In [17]:
# Visualising Data

# Transforming zero values for accidents windspeed

# If the feature has 0.0 values, use np.log1p (log(1+x)) instead of np.log
accidents['LogWindSpeed'] = accidents['Wind_Speed(mph)'].apply(np.log1p)

accidents.head()

Unnamed: 0,ID,Source,TMC,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,...,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight,LogWindSpeed
0,A-1,MapQuest,201.0,3,2016-02-08 05:46:00,2016-02-08 11:00:00,39.865147,-84.058723,,,...,False,False,False,False,False,Night,Night,Night,Night,
1,A-2,MapQuest,201.0,2,2016-02-08 06:07:59,2016-02-08 06:37:59,39.928059,-82.831184,,,...,False,False,False,False,False,Night,Night,Night,Day,
2,A-3,MapQuest,201.0,2,2016-02-08 06:49:27,2016-02-08 07:19:27,39.063148,-84.032608,,,...,False,False,False,True,False,Night,Night,Day,Day,1.504077
3,A-4,MapQuest,201.0,3,2016-02-08 07:23:34,2016-02-08 07:53:34,39.747753,-84.205582,,,...,False,False,False,False,False,Night,Day,Day,Day,1.722767
4,A-5,MapQuest,201.0,2,2016-02-08 07:39:07,2016-02-08 08:09:07,39.627781,-84.188354,,,...,False,False,False,True,False,Day,Day,Day,Day,1.504077
