# Customer Analysis Case

## **Libraries:**

In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline 

## Content notes:

### Files relative paths:

In [12]:
file1 = "file1.csv"
file2 = "file2.csv"
file3 = "Data_Marketing_Customer_Analysis_Round2.csv"

### Variables before cleaning:

Column names before change:\
'Customer', 'ST', 'GENDER', 'Education', 'Customer Lifetime Value','Income', 'Monthly Premium Auto', 'Number of Open Complaints','Policy Type', 'Vehicle Class', 'Total Claim Amount'

State names before change:\
'Washington', 'Arizona', 'Nevada', 'California', 'Oregon', 'Cali','AZ', 'WA', nan

In [13]:
data["state"].unique()

NameError: name 'data' is not defined

## **Functions:**

### Get data:

In [14]:
#### read a file:
def get_data_set(file1):             
    return pd.read_csv(file1)

####combine with dataframe from file2:
def concat_data_set2(data): 
    df2 = pd.read_csv("file2.csv")
    return pd.concat([data,df2])

### Data cleaning:

In [15]:
### removing duplicates:
def remove_duplicates(data):
    return data.drop_duplicates()

### renaming columns
def rename_columns(data):
    return data.rename(
        columns={"Customer":"customer",
        "ST":"state",
        "GENDER":"gender",
        "Education":"education",
        "Customer Lifetime Value":"customer_lifetime_value",
        "Monthly Premium Auto":"monthly_premium_auto",
        "Income":"income",
        "Number of Open Complaints":"number_of_open_complaints",
        "Policy Type":"policy_type",
        "Vehicle Class":"vehicule_class",
        "Total Claim Amount":"total_claim_amount"
                }
    )


### cleaning up state names
def clean_up_states(data):
    dict = {
        "Oregon": "OR",
        "California": "CA",
        "Arizona": "AZ",
        "Nevada": "NV",
        "Washington": "WA",
        "Cali": "CA"
        }
    data.replace({"State": dict})
    return data
  

Test area:

In [19]:
data["state"].value_counts()

Oregon        623
California    486
Arizona       327
Nevada        223
Washington    181
Cali          120
AZ             74
WA             30
Name: state, dtype: int64

In [18]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2065 entries, 0 to 995
Data columns (total 11 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   customer                   2064 non-null   object 
 1   state                      2064 non-null   object 
 2   gender                     1942 non-null   object 
 3   education                  2064 non-null   object 
 4   customer_lifetime_value    2057 non-null   object 
 5   income                     2064 non-null   float64
 6   monthly_premium_auto       2064 non-null   float64
 7   number_of_open_complaints  2064 non-null   object 
 8   policy_type                2064 non-null   object 
 9   vehicule_class             2064 non-null   object 
 10  total_claim_amount         2064 non-null   float64
dtypes: float64(3), object(8)
memory usage: 193.6+ KB


## **Pipeline controller:**

In [17]:
data = (
    get_data_set(file1)    #### getting data from file1
.pipe(concat_data_set2)     #### appending data from dataset2
.pipe(remove_duplicates)    #### removing duplicates
.pipe(rename_columns)       #### renaming columns
.pipe(clean_up_states)     #### normalizing state names

)
data

Unnamed: 0,customer,state,gender,education,customer_lifetime_value,income,monthly_premium_auto,number_of_open_complaints,policy_type,vehicule_class,total_claim_amount
0,RB50392,Washington,,Master,,0.0,1000.0,1/0/00,Personal Auto,Four-Door Car,2.704934
1,QZ44356,Arizona,F,Bachelor,697953.59%,0.0,94.0,1/0/00,Personal Auto,Four-Door Car,1131.464935
2,AI49188,Nevada,F,Bachelor,1288743.17%,48767.0,108.0,1/0/00,Personal Auto,Two-Door Car,566.472247
3,WW63253,California,M,Bachelor,764586.18%,0.0,106.0,1/0/00,Corporate Auto,SUV,529.881344
4,GA49547,Washington,M,High School or Below,536307.65%,36357.0,68.0,1/0/00,Personal Auto,Four-Door Car,17.269323
...,...,...,...,...,...,...,...,...,...,...,...
991,HV85198,Arizona,M,Master,847141.75%,63513.0,70.0,1/0/00,Personal Auto,Four-Door Car,185.667213
992,BS91566,Arizona,F,College,543121.91%,58161.0,68.0,1/0/00,Corporate Auto,Four-Door Car,140.747286
993,IL40123,Nevada,F,College,568964.41%,83640.0,70.0,1/0/00,Corporate Auto,Two-Door Car,471.050488
994,MY32149,California,F,Master,368672.38%,0.0,96.0,1/0/00,Personal Auto,Two-Door Car,28.460568
