# The Airports case study for Section 1 
## Lauren Wright and Nyla Butler

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [None]:
tsa_claims = pd.read_csv('tsa_claims1.csv')

In [None]:
tsa_claims.head()

In [None]:
tsa_claims.nunique()

In [None]:
tsa_claims.info()

## What is the most common type of insurance claim?

In [None]:
tsa_claims['Claim Type'].mode()

In [None]:
claim_type_counts = tsa_claims['Claim Type'].value_counts().reset_index()
claim_type_counts.columns = ['Claim Type', 'count']

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(data=claim_type_counts, x='Claim Type', y='count', palette='viridis')
plt.title('Most Common Type of Insurance Claim')
plt.xlabel('Claim Type')
plt.ylabel('Number of Claims')
plt.xticks(rotation=45)
plt.show()

## Which claim site within the airport are claims most commonly filed for? 

In [None]:
claim_site_counts = tsa_claims['Claim Site'].value_counts().reset_index()
claim_site_counts.columns = ['Claim Site', 'count']

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(data=claim_site_counts, x='Claim Site', y='count', palette='viridis')
plt.title('Most Common Claim Site Within the Airport')
plt.xlabel('Claim Site')
plt.ylabel('Number of Claims')
plt.xticks(rotation=45)
plt.show()

## What type of claim is made most at each claim site? 

In [None]:
most_common_claims = tsa_claims.groupby('Claim Site')['Claim Type'].agg(lambda x: x.mode()[0]).reset_index()

In [None]:
claim_type_site_counts = tsa_claims.groupby(['Claim Site', 'Claim Type']).size().reset_index(name='count')

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(data=claim_type_site_counts, x='Claim Site', y='count', hue='Claim Type', palette='coolwarm')
plt.title('Type of Claims at Each Claim Site')
plt.xlabel('Claim Site')
plt.ylabel('Number of Claims')
plt.xticks(rotation=45)
plt.legend(title='Claim Type')
plt.show()

## What is the typical claim amount? 

In [None]:
approval_rate = tsa_claims['Claim Amount'].value_counts(normalize=True).reset_index()
approval_rate.columns = ['Claim Amount', 'Close Amount']
plt.figure(figsize=(6, 6))
sns.barplot(data=approval_rate, x='Claim Amount', y='Close Amount', palette='Set2')
plt.title('Overall Claim Approval Rate')
plt.xlabel('Claim Amount')
plt.ylabel('Proportion')
plt.show()

## What is the overall claim approval rate for the entire U.S.? 

In [None]:
approval_rate = tsa_claims['Status'].value_counts(normalize=True).reset_index()
approval_rate.columns = ['approval_status', 'proportion']
plt.figure(figsize=(13, 6))
sns.barplot(data=approval_rate, x='approval_status', y='proportion', palette='Set2')
plt.title('Overall Claim Approval Rate')
plt.xlabel('Status')
plt.ylabel('Proportion')
plt.show()