## Lab - Data Cleaning using SQL

Please re-apply the data cleaning steps that you have performed previously(using Pandas)  on the Customer Analysis Case Study using SQL this time. You can find the previous lab in this [Link](https://github.com/raafat-hantoush/IH_RH_DA_FT_AUG_2021/blob/main/Class%20Materials/Pandas/Labs/Customer_Analysis_Case_Study/Activities.md)

You can find the csv file that contains data  in this  [Link](https://github.com/raafat-hantoush/IH_RH_DA_FT_AUG_2021/blob/main/Class%20Materials/Pandas/Labs/Customer_Analysis_Case_Study/Data_Marketing_Customer_Analysis_Round2.csv).

Once You finish cleaning and preparing  the data, please load  the cleaned data into a Pandas data Frame.

Hint: you can use Stored Procedures or Functions to organise your SQL operations.


In [None]:
select *
from round2;

#-----------changing names---------------
ALTER TABLE round2
CHANGE State state VARCHAR(30),
CHANGE `Customer Lifetime Value` customer_lifetime_value DOUBLE,
CHANGE `Effective To Date` effective_to_date TEXT,
CHANGE `Marital Status` marital_status TEXT,
CHANGE `Monthly Premium Auto` monthly_premium_auto INT,
CHANGE `Months Since Last Claim` months_since_last_claim TEXT,
CHANGE `Months Since Policy Inception` months_since_policy_inception INT,
CHANGE `Number Of Open Complaints` number_of_open_complaints TEXT,
CHANGE `Number of Policies` number_of_policies INT,
CHANGE `Policy Type` policy_type TEXT,
CHANGE `Renew Offer Type` renew_offer_type TEXT,
CHANGE `Sales Channel` sales_channel TEXT,
CHANGE `Total Claim Amount` total_claim_amount DOUBLE,
CHANGE `Vehicle Class` vehicle_class TEXT,
CHANGE `Vehicle Size` vehicle_size TEXT,
CHANGE `Vehicle Type` vehicle_type TEXT,
CHANGE COLUMN `Response` `response` TEXT NULL DEFAULT NULL ,
CHANGE COLUMN `Coverage` `coverage` TEXT NULL DEFAULT NULL ,
CHANGE COLUMN `Education` `education` TEXT NULL DEFAULT NULL ,
CHANGE COLUMN `EmploymentStatus` `employment_status` TEXT NULL DEFAULT NULL ,
CHANGE COLUMN `Gender` `gender` TEXT NULL DEFAULT NULL ,
CHANGE COLUMN `Income` `income` INT NULL DEFAULT NULL ,
CHANGE COLUMN `Location Code` `location_code` TEXT NULL DEFAULT NULL ,
CHANGE COLUMN `Policy` `policy` TEXT NULL DEFAULT NULL;



# ----------droping columns-------------
ALTER TABLE round2 #change first column to a meaningful name
RENAME COLUMN MyUnknownColumn TO customer_id;
 
ALTER TABLE round2 #delete column customer
DROP COLUMN Customer;


#----------- replacing empty values---------   
update round2
set months_since_last_claim = (select avg(months_since_last_claim))
where months_since_last_claim = "";

update round2
set number_of_open_complaints = 0
where number_of_open_complaints not in (1.0, 2.0, 3.0, 4.0, 5.0);

#select if(char_length(months_since_last_claim) > 0, months_since_last_claim, 0)
#from customer_analysis


#-----------finding duplicates-------------
select COUNT(*)
FROM round2
GROUP BY customer_id, state,customer_lifetime_value, response, coverage,education, effective_to_date, employmentstatus, gender,income,location_code,marital_status, monthly_premium_auto, months_since_last_claim, months_since_policy_inception, number_of_policies,policy_type, policy, renew_offer_type,sales_channel,vehicle_class, vehicle_size
HAVING COUNT(*) > 1;

-- Check the data types of all the columns and fix the incorrect ones (for ex. customer lifetime --

ALTER TABLE round2
CHANGE COLUMN `customer_lifetime_value` `customer_lifetime_value` INT NULL DEFAULT NULL ;

-- Discontinue safe update mode -- 
SET SQL_SAFE_UPDATES = 0;

-- Replacing null values --

SELECT number_of_open_complaints FROM round2
WHERE number_of_open_complaints NOT IN (1,2,3,4,5);

UPDATE round2
SET number_of_open_complaints = 0
WHERE number_of_open_complaints NOT IN (1,2,3,4,5);

-- Filling mising values with other values --

#TBA


-- Selecting only unique and dropping duplicate rows with ROW_NUMBER function --

create table cs_study2
WITH CTE AS (
SELECT *,
ROW_NUMBER() OVER (
PARTITION BY 
state,
customer_lifetime_value,
response,
coverage,
education,
effective_to_date,
gender,
income,
location_code,
marital_status,
monthly_premium_auto,
months_since_last_claim,
months_since_policy_inception, 
number_of_open_complaints,
number_of_policies,
policy_type,
policy,
renew_offer_type,
sales_channel,
total_claim_amount,
vehicle_class,
vehicle_size,
vehicle_type

			
ORDER BY
state,
customer_lifetime_value,
response,
coverage

            ) row_num
		FROM round2
        ) 
        
        SELECT *   
FROM
    CTE
WHERE
    row_num = 1;

# Importing SQL- cleaned data into pandas

In [2]:
import pandas as pd
import getpass
import sqlalchemy as sa

In [3]:
driver = 'mysql+pymysql'
user = 'root'
password = getpass.getpass(prompt='Your password:'"")
ip = '127.0.0.1'

Your password:········


In [4]:
connection_string = f'{driver}://{user}:{password}@{ip}'

In [5]:
db_connection = sa.create_engine(connection_string)

In [6]:
insp = sa.inspect(db_connection)
db_list = insp.get_schema_names()
print(db_list)

['bank', 'cs_study', 'information_schema', 'lab_db', 'lab_db_python_sql', 'lab_groupby', 'mysql', 'olist', 'performance_schema', 'sakila', 'sys']


In [7]:
query = "SELECT * FROM cs_study.cs_study2"
cs_study2 = pd.read_sql_query(query, db_connection)
cs_study2.head()

Unnamed: 0,customer_id,state,customer_lifetime_value,response,coverage,education,effective_to_date,employment_status,gender,income,...,number_of_policies,policy_type,policy,renew_offer_type,sales_channel,total_claim_amount,vehicle_class,vehicle_size,vehicle_type,row_num
0,62,,2108,,Basic,Bachelor,1/2/11,Unemployed,M,0,...,1,Personal Auto,Personal L2,Offer1,Agent,351.85681,Two-Door Car,Medsize,,1
1,9926,,2143,,Basic,Bachelor,1/23/11,Unemployed,M,0,...,1,Corporate Auto,Corporate L1,Offer4,Web,686.774939,Two-Door Car,Medsize,,1
2,3980,,2150,,Basic,High School or Below,2/9/11,Unemployed,F,0,...,1,Personal Auto,Personal L3,Offer2,Web,292.8,Four-Door Car,Medsize,,1
3,1215,,2225,,Basic,College,1/22/11,Unemployed,M,0,...,1,Personal Auto,Personal L3,Offer2,Web,326.4,Four-Door Car,Medsize,,1
4,9670,,2227,,Basic,High School or Below,1/5/11,Retired,M,27972,...,1,Personal Auto,Personal L3,Offer3,Web,292.8,Four-Door Car,Medsize,,1
