In [1]:
# Module Importations
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [2]:
# Constants
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout = True)
plt.rc(
    "axes",
    labelweight = "bold",
    labelsize = "large",
    titleweight = "bold",
    titlesize = 14,
    titlepad = 10,
)

In [3]:
# Load Datasets
accidents = pd.read_csv(r'C:\Developer\scratch-pad-python\Datasets\US_Accidents_Dec20.csv')
autos = pd.read_csv(r'C:\Developer\scratch-pad-python\Datasets\Automobile_data.csv')
concrete = pd.read_excel(r'C:\Developer\scratch-pad-python\Datasets\Concrete_Data.xls')
customer = pd.read_csv(r'C:\Developer\scratch-pad-python\Datasets\Customer-Value-Analysis.csv')

In [4]:
autos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   symboling          205 non-null    int64  
 1   normalized-losses  205 non-null    object 
 2   make               205 non-null    object 
 3   fuel-type          205 non-null    object 
 4   aspiration         205 non-null    object 
 5   num-of-doors       205 non-null    object 
 6   body-style         205 non-null    object 
 7   drive-wheels       205 non-null    object 
 8   engine-location    205 non-null    object 
 9   wheel-base         205 non-null    float64
 10  length             205 non-null    float64
 11  width              205 non-null    float64
 12  height             205 non-null    float64
 13  curb-weight        205 non-null    int64  
 14  engine-type        205 non-null    object 
 15  num-of-cylinders   205 non-null    object 
 16  engine-size        205 non

In [5]:
# Mathematical Transforms - Relationships in mathematical forms

# Deriving stroke ratio
def return_stroke_ratio(stroke, bore):
    try:
        # Convert data type
        stroke = float(stroke)
        bore = float(bore)

        # Calculate stroke ratio 
        stroke_ratio = stroke / bore

    # Handle conversions that fail gracefully
    except:
        stroke_ratio = 0

    return stroke_ratio 

autos['stroke_ratio'] = autos.apply(lambda row: return_stroke_ratio(row['stroke'], row['bore']), axis = 1) # Research identifies stroke ratio as useful parameter

autos[['stroke', 'bore', 'stroke_ratio']].head()

Unnamed: 0,stroke,bore,stroke_ratio
0,2.68,3.47,0.772334
1,2.68,3.47,0.772334
2,3.47,2.68,1.294776
3,3.4,3.19,1.065831
4,3.4,3.19,1.065831


In [8]:
# Mathematical Transforms - Relationships in mathematical forms

# Deriving displacement
def return_displacement(stroke, bore, num_of_cylinders):
    try:
        # Convert data types
        stroke = float(stroke)
        bore = float(bore)

        numbers_dict = {'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6, 'eight': 8, 'ten': 10, 'twelve': 12}
        for key, value in numbers_dict:
            if num_of_cylinders == value:
                num_of_cylinders = key
                
        # Calculate displacement
        displacement = np.pi * ((0.5 * bore) ** 2) * stroke * num_of_cylinders

    # Handle conversions that fail
    except:
        displacement = 0

    return displacement

autos['displacement'] = autos.apply(lambda row: return_displacement(row['stroke'], row['bore'], row['num-of-cylinders']), axis = 1) # Displacement can also be a useful parameter

autos[['stroke', 'bore', 'num-of-cylinders', 'displacement']].head()

KeyboardInterrupt: 