In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore', message='.*Pyarrow.*')

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


## Questions

In [2]:
my_file = 'laptop.csv'
df = pd.read_csv(my_file, low_memory=False)

In [3]:
# Clean 'discount_price' and 'old_price' by removing non-numeric characters
df['discount_price'] = df['discount_price'].astype(str).str.replace('[^\d.]', '', regex=True)
df['old_price'] = df['old_price'].astype(str).str.replace('[^\d.]', '', regex=True)

# Convert the cleaned string columns to numeric, handling non-convertible values by setting them to NaN
df['discount_price'] = pd.to_numeric(df['discount_price'], errors='coerce')
df['old_price'] = pd.to_numeric(df['old_price'], errors='coerce')

df['discount_price'] = df['discount_price'].fillna(0)
df['old_price'] = df['old_price'].fillna(0)


# Remove duplicates based on 'laptop_name' or another unique identifier
df = df.drop_duplicates(subset=['laptop_name'])

# Saves the cleaned dataset to a new file 
cleaned_file_path = 'laptop.csv'  
df.to_csv(cleaned_file_path, index=False)

### 1. How many different laptop brands are there?

In [4]:
unique_brands = df['brand'].unique()
unique_brands_df = pd.DataFrame(unique_brands, columns=['Unique Laptop Brands'])
unique_brands_df

Unnamed: 0,Unique Laptop Brands
0,HP
1,Lenovo
2,Huawei
3,Dell
4,Asus
5,Apple
6,Acer
7,Microsoft
8,MSI


0-8 are all of the unique laptop brands 

### 2. What are the names and prices of the most and least expensive laptops?

In [5]:
# Most expensive based on discount price
most_expensive_discount = df.loc[df['discount_price'].idxmax(), ['laptop_name', 'discount_price']]

# Most expensive based on old price
most_expensive_old = df.loc[df['old_price'].idxmax(), ['laptop_name', 'old_price']]

# Least expensive based on discount price
least_expensive_discount = df.loc[df['discount_price'].idxmin(), ['laptop_name', 'discount_price']]

# Least expensive based on old price
least_expensive_old = df.loc[df['old_price'].idxmin(), ['laptop_name', 'old_price']]


summary_df = pd.DataFrame({
    'Category': ['Most Expensive (Discount Price)', 'Most Expensive (Old Price)', 
                 'Least Expensive (Discount Price)', 'Least Expensive (Old Price)'],
    'Laptop Name': [most_expensive_discount.laptop_name, most_expensive_old.laptop_name, 
                    least_expensive_discount.laptop_name, least_expensive_old.laptop_name],
    'Price': [most_expensive_discount.discount_price, most_expensive_old.old_price, 
              least_expensive_discount.discount_price, least_expensive_old.old_price]
})
summary_df

Unnamed: 0,Category,Laptop Name,Price
0,Most Expensive (Discount Price),MacBook Pro (Retina + Touch Bar),9099.0
1,Most Expensive (Old Price),MacBook Pro (Retina + Touch Bar),10199.0
2,Least Expensive (Discount Price),IdeaPad S130-14IGM,899.0
3,Least Expensive (Old Price),Aspire 1 A114-31-C6WP,999.0


i found least expensive and most expensive for both discounted prince and old price 

### 3. How are laptop prices distributed?


In [9]:
price_stats = df['discount_price'].describe().round(2).to_frame()
price_stats

Unnamed: 0,discount_price
count,135.0
mean,3244.95
std,1770.02
min,899.0
25%,1964.0
50%,2799.0
75%,3894.0
max,9099.0


### 4. What are the min, max, and mean display sizes?

In [10]:
display_size_stats = df['display_size'].agg(['min', 'max', 'mean']).round(2).to_frame()
display_size_stats

Unnamed: 0,display_size
min,12.0
max,18.4
mean,14.77


This is what the min,max and mean were after rounding for display size 

### 5. What is the average price for each brand?

In [11]:
#in this data frame we will use discounted price for the average price 
average_price_per_brand = df.groupby('brand')['discount_price'].mean().round(2).reset_index()
average_price_per_brand.columns = ['Brand', 'Average Price']
average_price_per_brand
df['display_size'] = df['display_size'].round(2)
average_price_per_brand


Unnamed: 0,Brand,Average Price
0,Acer,2608.43
1,Apple,5617.0
2,Asus,3028.52
3,Dell,3688.65
4,HP,3226.92
5,Huawei,3861.5
6,Lenovo,2186.69
7,MSI,9071.0
8,Microsoft,4999.0
