In [1]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score


In [2]:
#Import Libraries
import pandas as pd
import numpy as np
import os
import acquire

In [3]:
# Acquire

# Create function to retrieve telco_churn data
telco = acquire.get_telco_churn_data()

   

In [5]:
# Create helper function to get the necessary connection url
def get_connection(db_name):
    """
    This function uses my info from my env file to
    create a connection url to access the Codeup db.
    """
    return f'mysql+pymysql://{user}:{password}@{host}/{db_name}'


In [6]:
# Create function to retrieve telco_churn data
def get_telco_churn_data():
    """
    This function reads in the Telco Churn data from the Codeup db
    and returns a pandas DataFrame with all columns.
    """
    filename = 'telco.csv'

    if os.path.isfile(filename):
        return pd.read_csv(filename)

    else:
        sql = '''
                SELECT *
                FROM customers
                JOIN payment_types USING (payment_type_id)
                JOIN contract_types USING (contract_type_id)
                JOIN internet_service_types USING (internet_service_type_id);
                '''

        df = pd.read_sql(sql, get_connection('telco_churn'))

        df.to_csv(filename, index=False)

        return df



## Prepare


In [None]:
def prep_telco_churn(df):
    '''
    This function takes in the DataFrame from get_telco_churn_data
    and returns the DataFrame with preprocessing applied 
    '''
    # Drop duplicated columns and rows
    df = df.loc[:, ~df.columns.duplicated()]
    df.drop_duplicates(inplace=True)

    # Replace ' ' in 'total_charges' column with '0' and change its data type to float
    df.total_charges = df.total_charges.replace(' ', '0').astype(float)

    # Change data type for boolean columns
    for col in ['churn', 'partner', 'dependents', 'phone_service']:
        df[col] = df[col].map({'No': 0, 'Yes': 1})

    return df

In [None]:
def split_data(df):
    '''
    This function takes in a DataFrame and returns train, validate, and test DataFrames.
    '''
    # Create train_validate and test datasets
    train_validate, test = train_test_split(df, test_size=.2, random_state=123, stratify=df.churn)

    # Split train_validate into train and validate datasets
    train, validate = train_test_split(train_validate, test_size=.3, random_state=123, stratify=train_validate.churn)

    return train, validate, test


In [None]:
# To inspect the first few rows of the DataFrame, you can use the head functiotelco.head()
telco.head()

    
    


## Explore the data


#### Probe the data for insights
