<a href="https://colab.research.google.com/github/MuthomiTed/titanic/blob/master/titanic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Import CategoricalImputer: inputs and outputs numpy array
from sklearn_pandas import CategoricalImputer

# Import pandas
import pandas as pd

# categorical_imputer
class Categorical_Imputer:
    """
    Imputing categorical data using the most frequent value
    """
    
    # instance attribute
    def __init__(self, strategy):
        self.strategy = strategy
        
    # instance method
    def fit_transform(self, df:'dataframe')->'dataframe':
        """
        Fill in missing categorical values using most frequent value
        """
        
        # instantiate CategoricalImputer
        imputer = CategoricalImputer()
        
        # convert array to dataframe
        df_filled = df.apply(lambda x: imputer.fit_transform(x), axis=0)
        
        # return filled dataframe
        return df_filled

In [0]:
# Function to calculate missing values by column
def missing_values_table(df):
   
    # Total missing values
    mis_val = df.isnull().sum()
    
    # Percentage of missing values
    mis_val_percent = 100 * df.isnull().sum() / len(df)
    
    # Make a table with the results
    mis_val_table = pd.concat([mis_val, mis_val_percent], axis=1)
    
    # Rename the columns
    mis_val_table_ren_columns = mis_val_table.rename(
    columns = {0 : 'Missing Values', 1 : '% of Total Values'})
    
    # Sort the table by percentage of missing descending
    # .iloc[:, 1]!= 0: filter on missing missing values not equal to zero
    mis_val_table_ren_columns = mis_val_table_ren_columns[
        mis_val_table_ren_columns.iloc[:,1] != 0].sort_values(
    '% of Total Values', ascending=False).round(2)  # round(2), keep 2 digits
    
    # Print some summary information
    print("Your slelected dataframe has {} columns.".format(df.shape[1]) + '\n' + 
    "There are {} columns that have missing values.".format(mis_val_table_ren_columns.shape[0]))
    
    # Return the dataframe with missing information
    return mis_val_table_ren_columns


In [0]:
# Pandas
import pandas as pd

# Numpy
import numpy as np

# matplotlib
import matplotlib.pyplot as plt

# Ipython tool for figsize
from IPython.core.pylabtools import figsize

# train test split
from sklearn.model_selection import train_test_split

# Pipeline
from sklearn.pipeline import Pipeline

# DictVectorizer
from sklearn.feature_extraction import DictVectorizer

# XGB
import xgboost as xgb

# cross_val_score
from sklearn.model_selection import cross_val_score

# DataframeMapper, CategoricalImputer
from sklearn_pandas import DataFrameMapper, CategoricalImputer

# Imputer
from sklearn.preprocessing import Imputer

# FeatureUnion
from sklearn.pipeline import FeatureUnion

# Function Transformer
from sklearn.preprocessing import FunctionTransformer

# check missing value percentage *(Self Defined Package)
from missing_value.missing_values_table import missing_values_table

# fill categoriacal missing values *(Self Defined Package)
from missing_value.fill_missing_values import Categorical_Imputer

# Plot AUC *(Self Defined Package)
from metrics.roc_auc import roc_auc

# ROC AUC
from sklearn.metrics import roc_auc_score, roc_curve

ModuleNotFoundError: ignored

In [0]:
# matplotlib
import matplotlib.pyplot as plt

# roc_auc_score, roc_curve
from sklearn.metrics import roc_auc_score, roc_curve

class roc_auc:
    """
    Calculate AUC score, Plot ROC curve
    """
    
    # instance attribute
    def __init__(self, y_test, y_pred_prob, model='model'):
        self.y_test = y_test
        self.y_pred_prob = y_pred_prob
        self.model = model
        
    # instance method
    def auc(self):
        """
        calculate auc score
        """
        auc_score = roc_auc_score(self.y_test, self.y_pred_prob)
        
        return auc_score
        
    # instance method
    def plot_roc(self):
        """
        plot roc curve
        """
        # AUC score
        auc_score = roc_auc_score(self.y_test, self.y_pred_prob)
        
        # fpr, tpr, threshold
        fpr, tpr, thresholds = roc_curve(self.y_test, self.y_pred_prob)
        
        # ROC curve plot
        _ = plt.plot([0, 1], [0, 1], 'k--')
        _ = plt.plot(fpr, tpr, label= self.model + ' Classifier (AUC = {: .2f})'.format(auc_score))
        _ = plt.xlabel('False Positive Rate')
        _ = plt.ylabel('True Positive Rate')
        _ = plt.legend(loc='lower right')
        _ = plt.title(self.model + ' Classifier ROC Curve')
        _ = plt.show()

In [0]:
from sklearn import metrics

from metrics.roc_auc import roc_auc

ModuleNotFoundError: ignored

In [0]:
metrics.accuracy_score()

TypeError: ignored

In [0]:
# Python code to illustrate the Modules 
class Bmw: 
    # First we create a constructor for this class 
    # and add members to it, here models 
    def __init__(self): 
        self.models = ['i8', 'x1', 'x5', 'x6'] 
   
    # A normal print function 
    def outModels(self): 
        print('These are the available models for BMW') 
        for model in self.models: 
            print('\t%s ' % model) 

In [0]:
# Python code to illustrate the Module 
class Audi: 
    # First we create a constructor for this class 
    # and add members to it, here models 
    def __init__(self): 
        self.models = ['q7', 'a6', 'a8', 'a3'] 
  
    # A normal print function 
    def outModels(self): 
        print('These are the available models for Audi') 
        for model in self.models: 
            print('\t%s ' % model) 

In [0]:
# Python code to illustrate the Module 
class Nissan: 
    # First we create a constructor for this class 
    # and add members to it, here models 
    def __init__(self): 
        self.models = ['altima', '370z', 'cube', 'rogue'] 
  
    # A normal print function 
    def outModels(self): 
        print('These are the available models for Nissan') 
        for model in self.models: 
            print('\t%s ' % model) 

In [0]:
from Bmw import Bmw 
from Audi import Audi 
from Nissan import Nissan 

ModuleNotFoundError: ignored

In [0]:
# Import classes from your brand new package 
from Cars import Bmw 
from Cars import Audi 
from Cars import Nissan 
   
# Create an object of Bmw class & call its method 
ModBMW = Bmw() 
ModBMW.outModels() 
   
# Create an object of Audi class & call its method 
ModAudi = Audi() 
ModAudi.outModels() 
  
# Create an object of Nissan class & call its method 
ModNissan = Nissan() 
ModNissan.outModels() 

ModuleNotFoundError: ignored