# Objectives: 
## - Develop a predictive (regression) model that predicts prices of (future) iPhone releases
## - Train the model with data of iPhone prices from first release (2007) to present day
## - Deploy predictive model into the front-end as a web application using Dash (Python) and/or React (JavaScript) 


In [428]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import os 
import seaborn as sns 
%matplotlib inline 

data_path = "./iphone_releases.csv"

iphones = pd.read_csv(data_path)

def saveOutput(df, filename):
    df.to_csv(filename)

### Stage 1: Pre-Processing
#### - Clean, format, and repurpose iPhone price data for the predictive model

In [429]:
# CONVERT ALL TO LOWERCASE 
# turn rows to uppercase
iphones = iphones.apply(lambda col: col.str.upper() if col.dtype == 'O' else col) 
# turn columns to uppercase 
iphones.columns = iphones.columns.str.upper()

# REFORMAT "DATE" COLUMN 
# exclude month and day, only keep year 
iphones["DATE"] = iphones["DATE"].str[:4] 

# RENAME COLUMNS 
# rename "date" column to "year" 
iphones.rename(columns={"DATE": "YEAR"}, inplace=True)
# rename "memory" column to "GB" 
iphones.rename(columns={"MEMORY": "GB"}, inplace=True)

# FILL MISSING VALUES 
# set the "version" of the first 2 rows (original iPhone) as "1" 
iphones.loc[iphones["YEAR"] == "2007", "VERSION"] = 1 

# set missing "edition" values to 0, for iPhones not of special edition 
edition_regular = "STANDARD" 
edition_special = "SPECIAL" 
iphones.loc[iphones["EDITION"].isnull(), "EDITION"] = edition_regular 

# set missing "scale" values to "STANDARD", for iPhones not of special size 
size_regular = "MEDIUM"
size_large = "LARGE"
iphones.loc[iphones["SCALE"].isnull(), "SCALE"] = size_regular

# RE-FORMAT SIMILAR (REDUNDANT) VALUE TYPES 
# reformat "edition" column, merge "S" and "PRO" values 
iphones.loc[(iphones["EDITION"] == "S") | (iphones["EDITION"] == "PRO"), "EDITION"] = edition_special 

# reformat "scale" column, merge "Plus" and "Max" values 
iphones.loc[(iphones["SCALE"] == "PLUS") | (iphones["SCALE"] == "MAX"), "SCALE"] = size_large

# REMOVE EXCEPTIONAL DATA POINTS 
# drop samples with "edition" values other than "SPECIAL" or "STANDARD" (like "R" or "C")
iphones = iphones.loc[~((iphones["EDITION"] != edition_regular) & (iphones["EDITION"] != edition_special))] 

# drop samples with "edition" values other than "NORMAL" or "LARGE" (like "MINI") 
iphones = iphones.loc[~((iphones["SCALE"] != size_regular) & (iphones["SCALE"] != size_large))]

In [430]:
saveOutput(iphones, "iphones_output.csv")

iphones

Unnamed: 0,YEAR,VERSION,EDITION,SCALE,GB,PRICE
0,2007,1,STANDARD,MEDIUM,4,499.0
1,2007,1,STANDARD,MEDIUM,8,599.0
2,2008,3G,STANDARD,MEDIUM,8,199.0
3,2008,3G,STANDARD,MEDIUM,16,299.0
4,2009,3G,SPECIAL,MEDIUM,16,199.0
...,...,...,...,...,...,...
109,2023,15,SPECIAL,MEDIUM,512,1299.0
110,2023,15,SPECIAL,MEDIUM,1024,1499.0
111,2023,15,SPECIAL,LARGE,256,1199.0
112,2023,15,SPECIAL,LARGE,512,1399.0
