<a href="https://colab.research.google.com/github/AndreSlavescu/Tensorflow-FreeCodeCamp/blob/main/Core-Learning-Algorithms/LinearRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Setup and Imports


In [1]:
!pip install -q sklearn

In [2]:
#line only required for notebooks
%tensorflow_version 2.x

In [12]:
from __future__ import division, absolute_import, print_function, unicode_literals

import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib

import tensorflow.compat.v2.feature_column as fc

import tensorflow as tf

#Data

In [None]:
#load in dataset
dftrain = pd.read_csv("https://storage.googleapis.com/tf-datasets/titanic/train.csv") #training data
dfeval = pd.read_csv("https://storage.googleapis.com/tf-datasets/titanic/eval.csv") #testing data
# print(dftrain.head())
y_train = dftrain.pop("survived") #seperate classification data from input data by removing "survived" from the input dataset and storing it in the y_train variable
y_eval = dfeval.pop("survived")
# print(dftrain.head(), y_train())
# print(dftrain.loc[0], y_train.loc[0]) 

###dftrain.head() shows first 5 items in the dataframe

In [None]:
dftrain.head()

###dftrain.describe() shows a statistical analysis of the data 

In [None]:
dftrain.describe()

###dftrain.shape demonstrates shape of dataframe  

In [23]:
#(rows, columns)
dftrain.shape

(627, 9)

#Data Graphs

In [None]:
#age distribution
dftrain.age.hist(bins="20")

In [None]:
#male to female representation
dftrain.sex.value_counts().plot(kind = "barh")

In [None]:
#boarding class
dftrain["class"].value_counts().plot(kind = "barh")

In [None]:
#survival likelihood by sex 
pd.concat([dftrain, y_train], axis=1).groupby("sex").survived.mean().plot(kind = "barh").set_xlabel("% survive")

#Training vs. Testing

In [None]:
#testing data shape (rows, columns) -> used as reference to test model accuracy
dfeval.shape

#Feature Columns

In [None]:
CATEGORICAL_COLUMNS = ["sex", "n_siblings_spouses", "parch", "class", "deck", "embark_town", "alone"]
NUMERIC_COLUMNS = ["age", "fare"]

feature_columns = []

for feature_name in CATEGORICAL_COLUMNS:
  vocabulary = dftrain[feature_name].unique()
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

# print(feature_columns)

#Input Function

In [42]:
def make_input_fn(data_df, label_df, num_epochs = 10, shuffle = True, batch_size = 32):
  def input_function(): #inner function that will be returned
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df)) #create tf.data.Dataset object with data and label
    if shuffle:
      ds = ds.shuffle(1000) #randomize the order of data
    ds = ds.batch(batch_size).repeat(num_epochs) #split the dataset into batches of 32 and repeat the training process for number of epochs
    return ds #return a batch of the dataset
  return input_function #return a function object 

In [43]:
train_input_fn = make_input_fn(dftrain, y_train)
eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False)

#Designing the Model

In [None]:
#use linear estimator to use the linear regression algorithm
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)

#create a linear estimator by passing feature_columns created earlier

In [None]:
linear_est.train(train_input_fn) #train the model
result = linear_est.evaluate(eval_input_fn) #evaluates model metrics by testing on testing data

In [None]:
clear_output() #clear console output
print(result["accuracy"]) #result is a dictionary of stats about the model

###Model Predictions

In [None]:
result = list(linear_est.predict(eval_input_fn))
for i in range(5): #loops through first 5 people
  print(dftrain.loc[i])
  output = result[i]["probabilities"][1]
  if y_eval[i] == 1:
    print("Survived"+" -> "+f"Probability of Survival: {output}")
  else:
    print("Did not Survive"+"->"+f"Probability of Survival: {output}")
    