#import environment
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('../input/travel-insurance/travel_insurance.csv')
df.head()

In [None]:
df.dtypes

In [None]:
#change to bool

def change_bool_yes_no(df, column):
    df[column] = df[column].map({'Yes': True, 1: True, 'No': False, 0: False})
    return df

travel_df = df.copy()
change_bool_yes_no(travel_df,'GraduateOrNot')
change_bool_yes_no(travel_df,'FrequentFlyer')
change_bool_yes_no(travel_df,'EverTravelledAbroad')
change_bool_yes_no(travel_df,'ChronicDiseases')
change_bool_yes_no(travel_df,'TravelInsurance')

travel_df.rename(columns = {'Employment Type' : 'EmploymentType'}, inplace = True)

In [None]:
travel_df.head()

In [None]:
travel_df.count()

In [None]:
travel_df.describe()

In [None]:
travel_df['TravelInsurance'].value_counts(normalize = True).plot.pie(labels=['non-customer','customer'],autopct="%1.f%%")
plt.title('Customer percentage')

In [None]:
cus_df = travel_df.query('TravelInsurance == True')
cus_df.groupby('EmploymentType')['TravelInsurance'].agg('count').plot.pie(autopct= '%1.f%%')
plt.title('Which sector is buying insurance more than other')

In [None]:
travel_df.groupby('EmploymentType')['EmploymentType'].count()

In [None]:
plt.figure(figsize = (10,5))
plt.subplot(1,2,1)
label = ['non-customer','customer']
## pie chart for government
gov_df = travel_df.query('EmploymentType == "Government Sector"')
ax1 = gov_df['TravelInsurance'].value_counts(normalize = True).plot.pie(labels = label,autopct= '%1.f%%')
plt.title('Government Sector')
## pie chart for pivate segment
plt.subplot(1,2,2)
pri_df = travel_df.query('EmploymentType == "Private Sector/Self Employed"')
ax2 = pri_df['TravelInsurance'].value_counts(normalize = True).plot.pie(labels = label,autopct='%1.f%%')
plt.title('Private Sector')

plt.suptitle('Compared customer percent per sector')

In [None]:
travel_df.groupby('TravelInsurance').sum()

In [None]:
travel_df.head()

In [None]:
cus_bar = cus_df.groupby(['FrequentFlyer','EverTravelledAbroad'])['TravelInsurance'].count()
cus_bar.reset_index()

In [None]:
non_cus_df = travel_df.query('TravelInsurance == 0')
non_cus_bar = non_cus_df.groupby(['FrequentFlyer','EverTravelledAbroad'])['TravelInsurance'].count()
non_cus_bar.reset_index()

In [None]:
x = ['no-flight,no-travel','no-filght,travel','flight,no-travel','flight,travel']
X_axis= np.arange(len(x))
plt.bar(X_axis - 0.2, cus_bar, 0.4, label= 'customer')
plt.bar(X_axis + 0.2, non_cus_bar, 0.4, label= 'non-customer')

plt.xticks(X_axis, x, rotation = 45)
plt.title('Number of customer and non-customer for each travel habit')
plt.legend()
plt.show()

In [None]:
##select cus from travel habit(Frequent flyer and Ever travelled abroad)
cus_by_fight_travel = travel_df.query('FrequentFlyer == True & EverTravelledAbroad == True & TravelInsurance == True').copy()
cus_by_fight_travel.head()

In [None]:
#compare group by age and fam
age_cus_by_fight_travel = cus_by_fight_travel.copy()
age_cus_by_fight_travel = age_cus_by_fight_travel.groupby('Age').agg({'AnnualIncome':'mean','FamilyMembers':'mean','TravelInsurance':'sum'}).reset_index()
age_cus_by_fight_travel.sort_values('TravelInsurance', ascending = False)

In [None]:
fam_cus_by_fight_travel = cus_by_fight_travel.groupby('FamilyMembers').agg({'AnnualIncome':'mean','Age':'mean','TravelInsurance':'sum'}).copy()
fam_cus_by_fight_travel = fam_cus_by_fight_travel.sort_values('TravelInsurance', ascending = False).reset_index()
fam_cus_by_fight_travel

In [None]:
employ_cus_by_fight_travel = cus_by_fight_travel.groupby('EmploymentType').agg({'Age':'mean','AnnualIncome':'mean','FamilyMembers':'mean','TravelInsurance':'sum'}).copy()
employ_cus_by_fight_travel = employ_cus_by_fight_travel.sort_values('TravelInsurance', ascending = False).reset_index()
employ_cus_by_fight_travel

In [None]:
#target group 
target_group = employ_cus_by_fight_travel.query('EmploymentType == "Private Sector/Self Employed"')
target_group

In [None]:
#which age is has the most customer
cus_df.groupby('Age').agg({'AnnualIncome':'mean','FamilyMembers':'mean','TravelInsurance':'sum'}).sort_values('TravelInsurance', ascending = False)

In [None]:
#relation between age and insurance bought
cus_by_age = travel_df.groupby('Age').agg({'AnnualIncome':'mean','FamilyMembers':'mean','TravelInsurance':'sum'}).sort_values('TravelInsurance', ascending = False)
cus_by_age = cus_by_age.reset_index()

sns.regplot(data= cus_by_age,x='Age',y='TravelInsurance', ci = None)
plt.title('Relation between Age and Insurance bought')

In [None]:
#Do family member effect insurance bought
cus_by_fam = cus_df.groupby('FamilyMembers').agg({'Age':'mean','AnnualIncome':'mean','TravelInsurance':'sum'}).sort_values('TravelInsurance', ascending = False)
cus_by_fam = cus_by_fam.reset_index()
cus_by_fam

In [None]:
sns.regplot(data=cus_by_fam, x='FamilyMembers',y='TravelInsurance', ci = None)
plt.title('Relation between Family members and Insurance')

In [None]:
sns.regplot(data=cus_by_fam, x='FamilyMembers',y='AnnualIncome', ci = None)
plt.title('Relation between Family members and Income')

In [None]:
sns.barplot(data=cus_by_fam, x='FamilyMembers',y='AnnualIncome')
plt.show()

In [None]:
cus_by_age = cus_df.groupby('Age').agg({'AnnualIncome':'mean','FamilyMembers':'mean','TravelInsurance':'sum'}).sort_values(['TravelInsurance','AnnualIncome'], ascending = False)
cus_by_age = cus_by_age.reset_index()
cus_by_age

In [None]:
sns.barplot(data=cus_by_fam,x='FamilyMembers',y='TravelInsurance')