In [1]:
# ******ignore warnings
import warnings
warnings.filterwarnings("ignore")


import numpy as np
import pandas as pd
from scipy import stats
import os


# ******files/data
from pydataset import data
import env
# import acquire
# import prepare


# ******visualizations
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns


# ******sklearn
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

# 01. Data Acquisition
### Acquiring telco data from Codeup DB

In [None]:
# function to connect to Codeup DB using credentials from env.py file 

def connect_to_db(db, user = env.user, host = env.host, password = env.password):
    '''
    This function takes in a database and login credentials to establish a connection to the
    Codeup database, returning a string that can be used to query the database in the read_sql function
    '''
    return f'mysql+pymysql://{user}:{password}@{host}/{db}'

### Reading querying the database (that we established a connection to in previous step), returning the database and a Dataframe

In [None]:
# variable to store query for read_sql
sql_query = '''select * from customers 
                join contract_types using (contract_type_id) 
                join internet_service_types using (internet_service_type_id) 
                join payment_types using (payment_type_id)'''

# variable to store returned database 
raw_data = pd.read_sql(sql_query ,connect_to_db('telco_churn'))
raw_data.head(2)

### Caching Database as .csv file

In [None]:
# writing newly acquired Dataframe to .csv file
raw_data.to_csv('telco_data.csv')

# Function for acquire.py file

In [2]:
# these two functions will be inputs for the main acquire_telco_data() function

def get_connection(db, user = env.user, host = env.host, password = env.password):
    '''
    This function takes in login credentials from env file as an arguments and will be used to establish a 
    connection to Codeup Database by returning a connection URL.
    
    This will be the first of two input functions for our final acquire_telco_data function.
    '''
    return f'mysql+pymysql://{user}:{password}@{host}/{db}'

    
def new_telco_data():
    '''
    This function will store the sequel query that will be used, along with the get_connection function, 
    to read the telco data into a DataFrame, storing that DataFrame
    
    This will be the second of two input functions for our final acquire_telco_data function.
    ''' 
    
#   creating the variable that will hold the code used to query the Codeup database    
    sql_query = '''select * from customers 
                    join contract_types using (contract_type_id) 
                    join internet_service_types using (internet_service_type_id) 
                    join payment_types using (payment_type_id)'''
    
#   using the get_connection() function and sql_query to read in DataFrame from Codeup database
    df = pd.read_sql(sql_query, get_connection('telco_churn'))
    
    return df
    


In [3]:
def acquire_telco_data():
    '''
    This is the main acquire_telco_data function that checks to see if a local csv for telco data exists.
        If it does it will write the csv data into a DataFrame
            (else)
        If the telco data csv file does not exist locally, it will use the input function new_telco_data to 
        query the Codeup database and return the query as DataFrame .
            It will then cache the DataFrame and create local csv file. 
    '''
    if os.path.isfile('telco_data'):
#   if csv file already exists in local directory, this code will run

        df = pd.read_csv('telco_data', index_col = 0)
#       this line of code reads the telco csv file into a Database

    else:
#   if csv file does not exist in local directory, this code will run instead

        df = new_telco_data()
#       this line of code reads telco database into a DataFrame using the input function new_telco_data

        df.to_csv('telco_data.csv')
#       this line of code caches the telco data, creating a local csv file

    return df


In [4]:
# running final acquire_telco_data function and then previewing

raw_data = acquire_telco_data()
raw_data.head(2)

Unnamed: 0,payment_type_id,internet_service_type_id,contract_type_id,customer_id,gender,senior_citizen,partner,dependents,tenure,phone_service,...,tech_support,streaming_tv,streaming_movies,paperless_billing,monthly_charges,total_charges,churn,contract_type,internet_service_type,payment_type
0,2,1,3,0016-QLJIS,Female,0,Yes,Yes,65,Yes,...,Yes,Yes,Yes,Yes,90.45,5957.9,No,Two year,DSL,Mailed check
1,4,1,3,0017-DINOC,Male,0,No,No,54,No,...,Yes,Yes,No,No,45.2,2460.55,No,Two year,DSL,Credit card (automatic)
