In [1]:
import pandas as pd
import numpy as np

import pylab as plt
import seaborn as sns

import re

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split as tts

from sklearn.linear_model import LinearRegression as LinReg   # alias es mio

from sklearn.linear_model import Lasso        # regularizacion L1
from sklearn.linear_model import Ridge        # regularizacion L2
from sklearn.linear_model import ElasticNet   # regularizacion L1+L2

from sklearn.tree import ExtraTreeRegressor as ETR
from sklearn.ensemble import RandomForestRegressor as RFR  
from sklearn.ensemble import GradientBoostingRegressor as GBR
from sklearn.ensemble import GradientBoostingRegressor as GBR

from sklearn.metrics import mean_squared_error as mse

In [2]:
train = pd.read_csv(r'train.csv')
test = pd.read_csv(r'test.csv')
sample = pd.read_csv(r'muestra.csv')

In [3]:
alldata = pd.concat([train,test], axis=0)

In [4]:
def column_touchscreen (df):
    df['Touchscreen'] = df['Screen'].apply(lambda x: 0 if 'Touch' in x else 1)
    df['Touchscreen'] = df['Touchscreen'].astype(int)

column_touchscreen (alldata)

In [5]:
def column_screen (df):
    pattern_screen = r'(\d+x\d+)'
    df['Screen'] = df['Screen'].str.extract(pattern_screen)

column_screen (alldata)

In [6]:
def column_screensize (df):
    df['Screen Size'].unique()
    df['Screen Size'] = df['Screen Size'].str.replace('"', '')
    df['Screen Size'] = df['Screen Size'].astype(float)
    df['Screen Size'] = df['Screen Size'].astype(int)

column_screensize (alldata)

In [7]:
specs = {'Intel Core i5 2.3GHz': (2, 2.3),
    'Intel Core i5 1.8GHz': (2, 1.8),
    'Intel Core i5 7200U 2.5GHz': (2, 2.5),
    'Intel Core i7 2.7GHz': (4, 2.7),
    'Intel Core i5 3.1GHz': (4, 3.1),
    'AMD A9-Series 9420 3GHz': (2, 3.0),
    'Intel Core i7 2.2GHz': (4, 2.2),
    'Intel Core i7 8550U 1.8GHz': (4, 1.8),
    'Intel Core i5 8250U 1.6GHz': (4, 1.6),
    'Intel Core i3 6006U 2GHz': (2, 2.0),
    'Intel Core i7 2.8GHz': (4, 2.8),
    'Intel Core M m3 1.2GHz': (2, 1.2),
    'Intel Core i7 7500U 2.7GHz': (2, 2.7),
    'Intel Core i7 2.9GHz': (4, 2.9),
    'Intel Core i3 7100U 2.4GHz': (2, 2.4),
    'Intel Atom x5-Z8350 1.44GHz': (4, 1.44),
    'Intel Core i5 7300HQ 2.5GHz': (4, 2.5),
    'AMD E-Series E2-9000e 1.5GHz': (2, 1.5),
    'Intel Core i5 1.6GHz': (4, 1.6),
    'Intel Core i7 8650U 1.9GHz': (4, 1.9),
    'Intel Atom x5-Z8300 1.44GHz': (4, 1.44),
    'AMD E-Series E2-6110 1.5GHz': (4, 1.5),
    'AMD A6-Series 9220 2.5GHz': (2, 2.5),
    'Intel Celeron Dual Core N3350 1.1GHz': (2, 1.1),
    'Intel Core i3 7130U 2.7GHz': (2, 2.7),
    'Intel Core i7 7700HQ 2.8GHz': (4, 2.8),
    'Intel Core i5 2.0GHz': (4, 2.0),
    'AMD Ryzen 1700 3GHz': (8, 3.0),
    'AMD FX 9830P 3GHz': (4, 3.0),
    'Intel Core i7 7560U 2.4GHz': (2, 2.4),
    'AMD E-Series 6110 1.5GHz': (4, 1.5),
    'Intel Core i5 6200U 2.3GHz': (2, 2.3),
    'Intel Core M 6Y75 1.2GHz': (2, 1.2),
    'Intel Core i5 7500U 2.7GHz': (2, 2.7),
    'Intel Core i3 6006U 2.2GHz': (2, 2.2),
    'AMD A6-Series 9220 2.9GHz': (2, 2.9),
    'Intel Core i7 6920HQ 2.9GHz': (4, 2.9),
    'Intel Core i5 7Y54 1.2GHz': (2, 1.2),
    'Intel Core i7 7820HK 2.9GHz': (4, 2.9),
    'Intel Xeon E3-1505M V6 3GHz': (4, 3.0),
    'Intel Core i7 6500U 2.5GHz': (2, 2.5),
    'AMD E-Series 9000e 1.5GHz': (2, 1.5),
    'Intel Core i5 2.3GHz': (2, 2.3),
    'Intel Core i5 1.8GHz': (2, 1.8),
    'Intel Core i5 7200U 2.5GHz': (2, 2.5),
    'Intel Core i7 2.7GHz': (4, 2.7),
    'Intel Core i5 3.1GHz': (4, 3.1),
    'AMD A9-Series 9420 3GHz': (2, 3.0),
    'Intel Core i7 2.2GHz': (4, 2.2),
    'Intel Core i7 8550U 1.8GHz': (4, 1.8),
    'Intel Core i5 8250U 1.6GHz': (4, 1.6),
    'Intel Core i3 6006U 2GHz': (2, 2.0),
    'Intel Core i7 2.8GHz': (4, 2.8),
    'Intel Core M m3 1.2GHz': (2, 1.2),
    'Intel Core i7 7500U 2.7GHz': (2, 2.7),
    'Intel Core i7 2.9GHz': (4, 2.9),
    'Intel Core i3 7100U 2.4GHz': (2, 2.4),
    'Intel Atom x5-Z8350 1.44GHz': (4, 1.44),
    'Intel Core i5 7300HQ 2.5GHz': (4, 2.5),
    'AMD E-Series E2-9000e 1.5GHz': (2, 1.5),
    'Intel Core i5 1.6GHz': (4, 1.6),
    'Intel Core i7 8650U 1.9GHz': (4, 1.9),
    'Intel Atom x5-Z8300 1.44GHz': (4, 1.44),
    'AMD E-Series E2-6110 1.5GHz': (4, 1.5),
    'AMD A6-Series 9220 2.5GHz': (2, 2.5),
    'Intel Celeron Dual Core N3350 1.1GHz': (2, 1.1),
    'Intel Core i3 7130U 2.7GHz': (2, 2.7),
    'Intel Core i7 7700HQ 2.8GHz': (4, 2.8),
    'Intel Core i5 2.0GHz': (4, 2.0),
    'AMD Ryzen 1700 3GHz': (8, 3.0),
    'AMD FX 9830P 3GHz': (4, 3.0),
    'Intel Core i7 7560U 2.4GHz': (2, 2.4),
    'AMD E-Series 6110 1.5GHz': (4, 1.5),
    'Intel Core i5 6200U 2.3GHz': (2, 2.3),
    'Intel Core M 6Y75 1.2GHz': (2, 1.2),
    'Intel Core i5 7500U 2.7GHz': (2, 2.7),
    'Intel Core i3 6006U 2.2GHz': (2, 2.2),
    'AMD A6-Series 9220 2.9GHz': (2, 2.9),
    'Intel Core i7 6920HQ 2.9GHz': (4, 2.9),
    'Intel Core i5 7Y54 1.2GHz': (2, 1.2),
    'Intel Core i7 7820HK 2.9GHz': (4, 2.9),
    'Intel Xeon E3-1505M V6 3GHz': (4, 3.0),
    'Intel Core i7 6500U 2.5GHz': (2, 2.5),
    'AMD E-Series 9000e 1.5GHz': (2, 1.5),
    'AMD A10-Series A10-9620P 2.5GHz': (4, 2.5),
    'AMD A6-Series A6-9220 2.5GHz': (2, 2.5),
    'Intel Core i5 2.9GHz': (4, 2.9),
    'Intel Core i7 6600U 2.6GHz': (2, 2.6),
    'Intel Core i3 6006U 2.0GHz': (2, 2.0),
    'Intel Celeron Dual Core 3205U 1.5GHz': (2, 1.5),
    'Intel Core i7 7820HQ 2.9GHz': (4, 2.9),
    'AMD A10-Series 9600P 2.4GHz': (4, 2.4),
    'Intel Core i7 7600U 2.8GHz': (2, 2.8),
    'AMD A8-Series 7410 2.2GHz': (4, 2.2),
    'Intel Celeron Dual Core 3855U 1.6GHz': (2, 1.6),
    'Intel Pentium Quad Core N3710 1.6GHz': (4, 1.6),
    'AMD A12-Series 9720P 2.7GHz': (4, 2.7),
    'Intel Core i5 7300U 2.6GHz': (2, 2.6),
    'AMD A12-Series 9720P 3.6GHz': (4, 3.6),
    'Intel Celeron Quad Core N3450 1.1GHz': (4, 1.1),
    'Intel Celeron Dual Core N3060 1.60GHz': (2, 1.6),
    'Intel Core i5 6440HQ 2.6GHz': (4, 2.6),
    'Intel Core i7 6820HQ 2.7GHz': (4, 2.7),
    'AMD Ryzen 1600 3.2GHz': (6, 3.2),
    'Intel Core i7 7Y75 1.3GHz': (2, 1.3),
    'Intel Core i5 7440HQ 2.8GHz': (4, 2.8),
    'Intel Core i7 7660U 2.5GHz': (2, 2.5),
    'Intel Core i7 7700HQ 2.7GHz': (4, 2.7),
    'Intel Core M m3-7Y30 2.2GHz': (2, 2.2),
    'Intel Core i5 7Y57 1.2GHz': (2, 1.2),
    'Intel Core i7 6700HQ 2.6GHz': (4, 2.6),
    'Intel Core i3 6100U 2.3GHz': (2, 2.3),
    'AMD A10-Series 9620P 2.5GHz': (4, 2.5),
    'AMD E-Series 7110 1.8GHz': (4, 1.8),
    'Intel Celeron Dual Core N3350 2.0GHz': (2, 2.0),
    'AMD A9-Series A9-9420 3GHz': (2, 3.0),
    'Intel Core i7 6820HK 2.7GHz': (4, 2.7),
    'Intel Core M 7Y30 1.0GHz': (2, 1.0),
    'Intel Xeon E3-1535M v6 3.1GHz': (4, 3.1),
    'Intel Celeron Quad Core N3160 1.6GHz': (4, 1.6),
    'Intel Core i5 6300U 2.4GHz': (2, 2.4),
    'Intel Core i3 6100U 2.1GHz': (2, 2.1),
    'AMD E-Series E2-9000 2.2GHz': (2, 2.2),
    'Intel Celeron Dual Core N3050 1.6GHz': (2, 1.6),
    'Intel Core M M3-6Y30 0.9GHz': (2, 0.9),
    'AMD A9-Series 9420 2.9GHz': (2, 2.9),
    'Intel Core i5 6300HQ 2.3GHz': (4, 2.3),
    'AMD A6-Series 7310 2GHz': (4, 2.0),
    'Intel Atom Z8350 1.92GHz': (4, 1.92),
    'Intel Xeon E3-1535M v5 2.9GHz': (4, 2.9),
    'Intel Core i5 6260U 1.8GHz': (2, 1.8),
    'Intel Pentium Dual Core N4200 1.1GHz': (4, 1.1),
    'Intel Celeron Quad Core N3710 1.6GHz': (4, 1.6),
    'Intel Core M 1.2GHz': (2, 1.2),
    'AMD A12-Series 9700P 2.5GHz': (4, 2.5),
    'Intel Core i7 7500U 2.5GHz': (2, 2.5),
    'Intel Pentium Dual Core 4405U 2.1GHz': (2, 2.1),
    'AMD A4-Series 7210 2.2GHz': (4, 2.2),
    'Intel Core i7 6560U 2.2GHz': (2, 2.2),
    'Intel Core M m7-6Y75 1.2GHz': (2, 1.2),
    'AMD FX 8800P 2.1GHz': (4, 2.1),
    'Intel Core M M7-6Y75 1.2GHz': (2, 1.2),
    'Intel Core i5 7200U 2.50GHz': (2, 2.5),
    'Intel Core i5 7200U 2.70GHz': (2, 2.7),
    'Intel Atom X5-Z8350 1.44GHz': (4, 1.44),
    'Intel Core M 1.1GHz': (2, 1.1),
    'Intel Pentium Dual Core 4405Y 1.5GHz': (2, 1.5),
    'Intel Pentium Quad Core N3700 1.6GHz': (4, 1.6),
    'Intel Core M 6Y54 1.1GHz': (2, 1.1),
    'Intel Core i7 6500U 2.50GHz': (2, 2.5),
    'Intel Celeron Dual Core N3350 2GHz': (2, 2.0),
    'Samsung Cortex A72&A53 2.0GHz': (2, 2.0),
    'AMD E-Series 9000 2.2GHz': (2, 2.2),
    'Intel Core M 6Y30 0.9GHz': (2, 0.9),
    'AMD A9-Series 9410 2.9GHz': (2, 2.9),
    'Intel Pentium Quad Core N4200 1.1GHz': (4, 1.1),
    'Intel Atom x5-Z8550 1.44GHz': (4, 1.44),
    'Intel Celeron Dual Core N3060 1.6GHz': (2, 1.6),
    'Intel Core i5 1.3GHz': (2, 1.3),
    'Intel Core i5 7200U 2.7GHz': (2, 2.7),
    }

In [8]:
def column_cpu (df):
    df['Kernels'] = df['CPU'].map(lambda x: specs[x][0])
    df['GHz'] = df['CPU'].map(lambda x: specs[x][1])
    df['Kernels'] = df['Kernels'].astype(int)
    df['GHz'] = df['GHz'].astype(float)
    df = df.drop('CPU', axis=1)

column_cpu (alldata)
alldata = alldata.drop('CPU', axis=1)
alldata['Kernels'] = alldata['Kernels'].astype(int)

In [9]:
def column_ram (df):
    df['RAM'] = df['RAM'].str.replace(r'\D', '', regex=True)
    df['RAM'] = df['RAM'].astype(int)
    df = train.rename(columns={'RAM': 'RAM(GB)'})

column_ram (alldata)
alldata['RAM'] = alldata['RAM'].astype(int)

In [10]:
alldata = alldata.rename(columns={' Storage': 'Storage'})
pattern_storage = r'(\d+)(TB|GB)'
    
def convert_storage(storage):
    match = re.match(pattern_storage, storage)
    if match:
        size = int(match.group(1))
        unit = match.group(2)
        if unit == 'TB':
            size *= 1024
        return size
    return None

alldata['Storage'] = alldata['Storage'].apply(convert_storage)

In [11]:
def column_opeartingsystem (df):
    for index, value in enumerate(df['Operating System']):
        if value == 'macOS':
            df.loc[index, 'Operating System'] = 'Mac OS'
        elif value == 'Android':
            df.loc[index, 'Operating System'] = 'Windows'

    
alldata = alldata.drop('Operating System Version', axis=1)
column_opeartingsystem (alldata)

In [12]:
def column_weight (df):
    df['Weight'] = df['Weight'].str.replace(r'\D', '', regex=True)
    df['Weight'] = df['Weight'].astype(float)

column_weight (alldata)

In [17]:
category_getdummies = pd.get_dummies(alldata["Category"])
gpu_getdummies = pd.get_dummies(alldata["GPU"])
operatingsystem_getdummies = pd.get_dummies(alldata["Operating System"])
alldata = pd.concat([alldata, category_getdummies,operatingsystem_getdummies,gpu_getdummies ], axis=1)

In [19]:
new_test = alldata.tail(325)
new_train = alldata.head(977)

# SEGUNDA PARTE

In [None]:
# buscando colinealidad

plt.figure(figsize=(15, 10))

sns.set(style='white')

mask=np.triu(np.ones_like(df.corr(), dtype=bool))

cmap=sns.diverging_palette(0, 10, as_cmap=True)


sns.heatmap(df.corr(),
           mask=mask,
          cmap=cmap,
          center=0,
          square=True,
          annot=True,
          linewidths=0.5,
          cbar_kws={'shrink': 0.5});