# Objectives: 
## - Develop a predictive (regression) model that predicts prices of (future) iPhone releases
## - Train the model with data of iPhone prices from first release (2007) to present day
## - Deploy predictive model into the front-end as a web application using Dash (Python) and/or React (JavaScript) 


In [210]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import os 
import seaborn as sns 
%matplotlib inline 

data_path = "./iphone_releases.csv"

iphones = pd.read_csv(data_path)

def saveOutput(df, filename):
    df.to_csv(filename)

### Stage 1: Pre-Processing
#### - Clean, format, and repurpose iPhone price data for the predictive model

In [211]:
# CONVERT ALL TO LOWERCASE 
# turn rows to uppercase
iphones = iphones.apply(lambda col: col.str.upper() if col.dtype == 'O' else col) 
# turn columns to uppercase 
iphones.columns = iphones.columns.str.upper()

# REFORMAT "DATE" COLUMN 
# exclude month and day, only keep year 
iphones["DATE"] = iphones["DATE"].str[:4] 
# rename column to "year"
iphones.rename(columns={"DATE": "YEAR"}, inplace=True)

# FILL MISSING VALUES 
# set the "version" of the first 2 rows (original iPhone) as "1"
iphones.loc[iphones["YEAR"] == "2007", "VERSION"] = 1

# set missing "edition" values to 0, for iPhones not of special edition
iphones.loc[iphones["EDITION"].isnull(), "EDITION"] = 0

# set missing "scale" values to "STANDARD", for iPhones not of special size 
iphones.loc[iphones["SCALE"].isnull(), "SCALE"] = "STANDARD"

In [212]:
saveOutput(iphones, "iphones_output.csv")

iphones

Unnamed: 0,YEAR,VERSION,EDITION,SCALE,MEMORY,PRICE
0,2007,1,0,STANDARD,4,499.0
1,2007,1,0,STANDARD,8,599.0
2,2008,3G,0,STANDARD,8,199.0
3,2008,3G,0,STANDARD,16,299.0
4,2009,3G,S,STANDARD,16,199.0
...,...,...,...,...,...,...
109,2023,15,PRO,STANDARD,512,1299.0
110,2023,15,PRO,STANDARD,1024,1499.0
111,2023,15,PRO,MAX,256,1199.0
112,2023,15,PRO,MAX,512,1399.0
