## Analyzing data using Python

In [None]:
###  Import libraries

In [None]:
#!pip install seaborn

In [None]:
# requirement already sati
# !pip install scikit-learn

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Load and read data from local disk

Source: https://www.kaggle.com/ionaskel/laptop-prices#laptops.csv

In [None]:
laptop_data = pd.read_csv('datasets/laptops.csv', encoding='UTF-8', index_col=0)

laptop_data.head()

In [None]:
laptop_data.shape

In [None]:
laptop_data.info()

In [None]:
laptop_data.isnull().sum()

In [None]:
laptop_data.describe()

In [None]:
laptop_data.drop(['Product', 
                  'ScreenResolution', 
                  'Cpu', 
                  'Memory', 
                  'Gpu', 
                  'Weight'], inplace=True, axis=1)

In [None]:
laptop_data.sample(10)

In [None]:
laptop_data['TypeName'].unique()

In [None]:
laptop_data.TypeName.value_counts()

In [None]:
laptop_data['Company'].unique()

In [None]:
laptop_data.Company.value_counts()

In [None]:
plt.figure(figsize=(12, 8))

laptop_data['Company'].value_counts().plot(kind='bar')

plt.title('Laptops by company', fontsize=15)

plt.xlabel('Company', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.show()

In [None]:
plt.figure(figsize=(12, 8))

laptop_data['TypeName'].value_counts().plot(kind='bar')

plt.title('Frequency of items according to TypeNames', fontsize=15)
plt.xlabel('TypeName', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.show()

In [None]:
plt.figure(figsize=(12, 8))

laptop_data[['Price_euros']].boxplot()

In [None]:
laptop_data.boxplot(by = 'Company', column = ['Price_euros'],
                    grid = False, figsize=(12, 8))

plt.show()

In [None]:
plt.figure(figsize=(12, 8))

sns.swarmplot(x='TypeName', y='Price_euros', data=laptop_data)

plt.title('Price distribution by type', fontsize=15)

plt.xlabel('Company', fontsize=12)
plt.ylabel('Price (in euros)', fontsize=12)
plt.show()

In [None]:
plt.figure(figsize=(12, 8))

sns.scatterplot(x='Inches', y='Price_euros', hue='Ram', data=laptop_data)

plt.show()

In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

laptop_data['TypeName'] = label_encoder.fit_transform(laptop_data['TypeName'])

In [None]:
laptop_data.head()

In [None]:
dummy_laptop_data = pd.get_dummies(laptop_data)

dummy_laptop_data.head()

In [None]:
dummy_laptop_data.shape

In [None]:
X = dummy_laptop_data.drop('Price_euros', axis=1)

y = dummy_laptop_data['Price_euros']

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
X_train.shape, X_test.shape

In [None]:
y_train.shape, y_test.shape

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

linear_regression = LinearRegression()

linear_regression.fit(X_train, y_train)

In [None]:
linear_regression.score(X_train, y_train)

In [None]:
y_pred = linear_regression.predict(X_test)

r2_score(y_test, y_pred)

In [None]:
plt.figure(figsize = (12, 8))

plt.scatter(y_test, y_pred)

plt.xlabel('Actual Value')
plt.ylabel('Predicted Value')

plt.show()