In [None]:
import seaborn as sns
import pandas as pd

In [None]:
# load the car crashes dataset from the seaborn library
car = sns.load_dataset("car_crashes")

## Finding meaning into the car_crashes dataset

*Defining the names of the column heads*
1. `total`: This represents the total number of car crashes that occured in a particular state.
   This include all types of crashes.
2. `speeding`: This represents the number of car crashes in which speeding was a contributing factor.
3. `alcohol`: This represents the number of car crashes in which alcohol was a contributing factor.
4. `not_distracted`: This represents the number of car crashes that occurred without the involvement of driver distraction.
5. `no_previous`: This represents the number of car crashes involving drivers who had no previous incidents or crashes on their record.
6. `ins_premium`: This refers to "insurance premium". It refers to the amount of money an individual or business pays for an insurance policy.
7. `ins_losses`: This refers to "insurance losses". It refers to the average insurance losses incurred per insured driver in a given state.
8. `abbrev`: This represents the abbreviation for all the states in the United States.

## Data Understanding
* head
* dtypes
* shape
* describe

In [None]:
#Summarize using the head function

car.head()

In [None]:
#What are the data types for the various columns

car.dtypes

In [None]:
# What is the shape of the dataset?
car.shape

In [None]:
# What is the summary of the car_crashes dataset?
car.describe()

## Asking Questions

In [None]:
# 1. Locate row with the highest of total car crashes?

car.loc[car['total'].idxmax()]

In [None]:
# 2. Locate row with the lowest number of total car crashes?

car.loc[car['total'].idxmin()]

In [None]:
# 3. Which states have the highest and lowest number of total car crashes?
highest_crashes = car.loc[car['total'].idxmax(), 'total']
state_max = car.loc[car['total'].idxmax(), 'abbrev']

lowest_crashes = car.loc[car['total'].idxmin(), 'total']
state_min = car.loc[car['total'].idxmin(), 'abbrev']

print(f"{state_max} has the highest car crashes with {highest_crashes}")
print(f"{state_min} has the lowest car crashes with {lowest_crashes}")

In [None]:
# 4. Top 10 states with the highest payment of insurances.

highest_ins = car.sort_values(by='ins_losses', ascending=False).head(10)

print(highest_ins[['abbrev','ins_losses']])

In [None]:
# 5.What is the correlation between alcohol consumption and the number of crashes?

# Calculate the correlation between alcohol consumption and the number of crashes
correlation = car['alcohol'].corr(car['total'])

print(f"The correlation between alcohol consumption and the number of crashes is: {correlation}")

In [None]:
# 6. Filter the dateset with insurance premium greater than 900

filtered_ins = car[car["ins_premium"] > 1000]
print(filtered_ins)

In [None]:
# 7.Filter using AND (&) OR |

#Filter the dataset with ins_losses greater than 150 and the total less than 16

filter_ins_total = car[(car["ins_losses"] < 150) & (car["total"] > 16)]
filter_ins_total

In [None]:
# 8.Filter using ISIN

#filter States where "abbrev" isin "DC","NY","LA", and "CA"

filter_states = car[car["abbrev"].isin(["DC", "NY", "LA", "CA"])]
filter_states