## CHECKING FOR OUTLIERS FROM DATA

In [None]:
import pandas as pd

import seaborn as sns

In [None]:
import os

# Get the absolute path of the current working directory
current_directory = os.getcwd()

# Construct the relative path to your data folder and the Excel file
relative_path = 'data/laptop_data.xlsx' ## NOTE HERE I TRIED WITH OUR ALREADY CLEANED DATA BUT REALISTICALLY, WE REMOVE OUTLIERS FROM OUR DATA WHEN THEY ARE NOT YET CLEANED UP

# Combine the current directory with the relative path to get the absolute path to your Excel file
file_path = os.path.join(current_directory, relative_path)

# Read the Excel file using the constructed file path
data = pd.read_excel(file_path)
data.shape

In [None]:
data.describe()

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
data = data.dropna(axis=0) ## 0, or 'index' : Drop rows which contain missing values.
data.shape

In [None]:
numeric_columns = data.select_dtypes(include=['number']).columns

numeric_df = data.groupby("BRAND")[numeric_columns].mean()

non_numeric_df = data.groupby("BRAND").first()  # Example aggregation for non-numeric columns

# Merge the two DataFrames
df = pd.concat([numeric_df, non_numeric_df], axis=1)

df.plot.bar(y = "PRICE")

In [None]:
data["BRAND"].value_counts().plot.bar()

In [None]:
sns.boxplot(data['PRICE'])

In [None]:
# Select only numerical columns
numeric_columns = data.select_dtypes(include=['number']).columns

# Calculate quartiles and IQR for numerical columns
Q1 = data[numeric_columns].quantile(0.25)
Q3 = data[numeric_columns].quantile(0.75)
IQR = Q3 - Q1

print(IQR)

In [None]:
# Align the indices of data with Q1, Q3, and IQR
data_aligned, Q1_aligned = data.align(Q1, axis=1)
data_aligned, Q3_aligned = data_aligned.align(Q3, axis=1)
data_aligned, IQR_aligned = data_aligned.align(IQR, axis=1)

# Filter rows based on IQR criterion
data_filtered = data_aligned[~((data_aligned < (Q1_aligned - 1.5 * IQR_aligned)) | (data_aligned > (Q3_aligned + 1.5 * IQR_aligned))).any(axis=1)]

# Check the shape of the filtered data
print(data_filtered.shape)

In [None]:
sns.boxplot(data['PRICE'])

## Export the new data to your local system

In [None]:
## Export to a Windows OS
# data.to_excel(r"C:\Users\parad\Downloads\ ")

## Export to a macOS
data.to_excel(r"/Users/sunnythesage/Downloads/'NAME OF THE FILE YOU ARE EXPORTING.xlsx'")

print("DATA SUCCESSFULLY EXPORTED!")