In [2]:
# Re-load the dataset after execution state reset
import pandas as pd

file_path = "./extended_smartphone_sales_report.csv"
df = pd.read_csv(file_path)

# Display basic info about the dataset
df.info(), df.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14640 entries, 0 to 14639
Data columns (total 13 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Date                        14640 non-null  object 
 1   Product Name                14640 non-null  object 
 2   Units Sold                  14640 non-null  int64  
 3   Price                       14640 non-null  int64  
 4   Competitor Price            14640 non-null  int64  
 5   Stock Available             14640 non-null  int64  
 6   Marketing Spend             14640 non-null  int64  
 7   Holiday/Seasonal Indicator  14640 non-null  int64  
 8   Weather Condition           14640 non-null  object 
 9   Economic Indicator          14640 non-null  float64
 10  Social Media Trend Score    14640 non-null  int64  
 11  Market Sentiment Score      14640 non-null  float64
 12  Competitor Activity Score   14640 non-null  int64  
dtypes: float64(2), int64(8), object

(None,
          Date        Product Name  Units Sold  Price  Competitor Price  \
 0  2023-04-01           iPhone 14           6    906               880   
 1  2023-04-01       iPhone 14 Pro          24    968              1002   
 2  2023-04-01           iPhone 15          10   1268              1224   
 3  2023-04-01       iPhone 15 Pro          11    659               614   
 4  2023-04-01  Samsung Galaxy S22          24   1234              1316   
 
    Stock Available  Marketing Spend  Holiday/Seasonal Indicator  \
 0              181              278                           1   
 1              302              171                           1   
 2              237              103                           1   
 3              489             1443                           1   
 4              242              498                           1   
 
   Weather Condition  Economic Indicator  Social Media Trend Score  \
 0            Cloudy            0.931082                     

In [3]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

# Convert Date column to datetime format
df["Date"] = pd.to_datetime(df["Date"])

# Extract date-based features
df["Year"] = df["Date"].dt.year
df["Month"] = df["Date"].dt.month
df["Day"] = df["Date"].dt.day
df["Weekday"] = df["Date"].dt.weekday

# Encode categorical variables
label_encoder = LabelEncoder()
df["Product Name"] = label_encoder.fit_transform(df["Product Name"])
df["Weather Condition"] = label_encoder.fit_transform(df["Weather Condition"])

# Normalize numerical columns
scaler = MinMaxScaler()
num_cols = ["Price", "Competitor Price", "Stock Available", "Marketing Spend",
            "Economic Indicator", "Social Media Trend Score", "Market Sentiment Score", "Competitor Activity Score"]
df[num_cols] = scaler.fit_transform(df[num_cols])

# Drop original Date column as we have extracted features
df.drop(columns=["Date"], inplace=True)

# Display the processed dataset
df.head()


Unnamed: 0,Product Name,Units Sold,Price,Competitor Price,Stock Available,Marketing Spend,Holiday/Seasonal Indicator,Weather Condition,Economic Indicator,Social Media Trend Score,Market Sentiment Score,Competitor Activity Score,Year,Month,Day,Weekday
0,16,6,0.505421,0.466865,0.291759,0.127234,1,0,0.431125,0.656566,0.447599,0.444444,2023,4,1,5
1,17,24,0.557131,0.557707,0.561247,0.050751,1,0,0.532223,0.070707,0.401566,0.666667,2023,4,1,5
2,18,10,0.807339,0.723008,0.416481,0.002144,1,0,0.10041,0.141414,0.960145,0.555556,2023,4,1,5
3,19,11,0.299416,0.268801,0.977728,0.959971,1,0,0.549434,0.212121,0.316983,0.555556,2023,4,1,5
4,8,24,0.778982,0.791512,0.427617,0.284489,1,1,0.52875,0.979798,0.525918,0.222222,2023,4,1,5


In [4]:
df.to_csv("preprocessed_smartphone_sales.csv", index=False)  

In [4]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Generate date range for 3 years (1095 days)
start_date = datetime(2022, 1, 1)
end_date = datetime(2024, 12, 31)
date_range = pd.date_range(start=start_date, end=end_date, freq='D')

# Smartphone models with specifications
smartphones = [
    ("iPhone 14", "6GB", "128GB"),
    ("iPhone 15", "6GB", "256GB"),
    ("Samsung Galaxy S22", "8GB", "128GB"),
    ("Samsung Galaxy S23", "8GB", "256GB"),
    ("Google Pixel 7",  "8GB", "128GB"),
    ("Google Pixel 8",  "8GB", "256GB"),
    ("OnePlus 10 Pro","12GB", "256GB"),
    ("OnePlus 11",  "16GB", "512GB"),
    ("Xiaomi Mi 12",  "8GB", "128GB"),
    ("Xiaomi Mi 13",  "12GB", "256GB"),
    ("Oppo Find X5",  "8GB", "128GB"),
    ("Oppo Find X6", "12GB", "512GB")
]

# Generate sales data
data = []
for date in date_range:
    for name,ram, storage in smartphones:
        units_sold = np.random.randint(5, 200)
        price = np.random.randint(600, 1500)
        competitor_price = price + np.random.randint(-50, 50)
        stock_available = np.random.randint(50, 500)
        marketing_spend = np.random.randint(1000, 20000)
        holiday_season = 1 if date.month in [11, 12] else 0
        weather_condition = np.random.choice([0, 1], p=[0.85, 0.15])
        economic_indicator = np.random.uniform(0.8, 1.2)
        social_media_trend = np.random.uniform(0.5, 1.5)
        market_sentiment = np.random.uniform(0.5, 1.5)
        competitor_activity = np.random.uniform(0.5, 1.5)

        data.append([date, name, ram, storage, units_sold, price, competitor_price,
                     stock_available, marketing_spend, holiday_season, weather_condition,
                     economic_indicator, social_media_trend, market_sentiment, competitor_activity])

# Create DataFrame
columns = ["Date", "Product Name", "RAM", "Memory", "Units Sold", "Price (USD)",
           "Competitor Price", "Stock Available", "Marketing Spend", "Holiday/Season Indicator",
           "Weather Condition", "Economic Indicator", "Social Media Trend Score",
           "Market Sentiment Score", "Competitor Activity Score"]

df = pd.DataFrame(data, columns=columns)

# Save to CSV
df.to_csv("smartphone_sales_3_years.csv", index=False)
print("Dataset saved as smartphone_sales_3_years.csv")


Dataset saved as smartphone_sales_3_years.csv


In [1]:
# Re-load the dataset after execution state reset
import pandas as pd

file_path = "./smartphone_sales_3_years.csv"
df = pd.read_csv(file_path)
df.head()

FileNotFoundError: [Errno 2] No such file or directory: './smartphone_sales_3_years.csv'

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Load dataset
df = pd.read_csv("./smartphone_sales_3_years.csv")

# Convert 'Date' column to datetime format
df["Date"] = pd.to_datetime(df["Date"])

# Handle missing values
for col in df.select_dtypes(include=["number"]).columns:
    df[col] = df[col].fillna(df[col].median())  # Fill missing numerical values with median

for col in df.select_dtypes(include=["object"]).columns:
    df[col] = df[col].fillna(df[col].mode()[0])  # Fill missing categorical values with mode

# Remove duplicates
df = df.drop_duplicates()

# Remove outliers using IQR method
num_cols = df.select_dtypes(include=["number"]).columns
Q1 = df[num_cols].quantile(0.25)
Q3 = df[num_cols].quantile(0.75)
IQR = Q3 - Q1
df = df[~((df[num_cols] < (Q1 - 1.5 * IQR)) | (df[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)]

# Normalize numerical features using Min-Max Scaling
scaler = MinMaxScaler()
df[num_cols] = scaler.fit_transform(df[num_cols])

# Encode categorical variables using One-Hot Encoding
df = pd.get_dummies(df, columns=["Product Name", "RAM", "Memory"], drop_first=True)

# Save preprocessed dataset
df.to_csv("preprocessed_smartphone_sales.csv", index=False)
print("Preprocessed dataset saved as preprocessed_smartphone_sales.csv")

Preprocessed dataset saved as preprocessed_smartphone_sales.csv


In [12]:
import pandas as pd

file_path = "../data/smartphone_sales_3_years.csv"
df = pd.read_csv(file_path)

df.head()

Unnamed: 0,Date,Product Name,RAM,Memory,Units Sold,Price (USD),Competitor Price,Stock Available,Marketing Spend,Holiday/Season Indicator,Weather Condition,Economic Indicator,Social Media Trend Score,Market Sentiment Score,Competitor Activity Score
0,2022-01-01,iPhone 14,6GB,128GB,121,1229,1191,216,7196,0,0,1.141706,0.665271,0.943892,0.611567
1,2022-01-01,iPhone 15,6GB,256GB,9,1147,1176,292,12616,0,0,0.963156,1.38148,1.470195,1.177919
2,2022-01-01,Samsung Galaxy S22,8GB,128GB,164,874,872,401,10040,0,0,1.056488,1.089368,0.907426,1.382127
3,2022-01-01,Samsung Galaxy S23,8GB,256GB,40,628,586,401,5343,0,0,0.847789,0.520334,1.11403,0.927781
4,2022-01-01,Google Pixel 7,8GB,128GB,182,1360,1337,352,10026,0,0,1.030673,0.642926,1.049445,0.683519


In [24]:
pip install google-search-results

Collecting google-search-results
  Using cached google_search_results-2.4.2-py3-none-any.whl
Installing collected packages: google-search-results
Successfully installed google-search-results-2.4.2
Note: you may need to restart the kernel to use updated packages.


In [64]:
from serpapi import GoogleSearch

params = {
  "engine": "google_trends",
  "q": "Apple iPhone 14 128GB",
  "data_type": "TIMESERIES",
  "api_key": "54689b480842147086ac6f6b033f096ff9a76197e528284d3ee8cbb66e1de587"
}

search = GoogleSearch(params)
results = search.get_dict()
interest_over_time = results['interest_over_time']
timeline_data = interest_over_time['timeline_data']
print(timeline_data)
for values in timeline_data[:1]:
     extracted_value = values['values'][0]['value']
     print(extracted_value)


[{'date': 'Apr 7\u2009–\u200913, 2024', 'timestamp': '1712448000', 'values': [{'query': 'Apple iPhone 14 128GB', 'value': '41', 'extracted_value': 41}]}, {'date': 'Apr 14\u2009–\u200920, 2024', 'timestamp': '1713052800', 'values': [{'query': 'Apple iPhone 14 128GB', 'value': '44', 'extracted_value': 44}]}, {'date': 'Apr 21\u2009–\u200927, 2024', 'timestamp': '1713657600', 'values': [{'query': 'Apple iPhone 14 128GB', 'value': '46', 'extracted_value': 46}]}, {'date': 'Apr 28\u2009–\u2009May 4, 2024', 'timestamp': '1714262400', 'values': [{'query': 'Apple iPhone 14 128GB', 'value': '37', 'extracted_value': 37}]}, {'date': 'May 5\u2009–\u200911, 2024', 'timestamp': '1714867200', 'values': [{'query': 'Apple iPhone 14 128GB', 'value': '52', 'extracted_value': 52}]}, {'date': 'May 12\u2009–\u200918, 2024', 'timestamp': '1715472000', 'values': [{'query': 'Apple iPhone 14 128GB', 'value': '37', 'extracted_value': 37}]}, {'date': 'May 19\u2009–\u200925, 2024', 'timestamp': '1716076800', 'values

In [38]:
print(results["interest_over_time"])

{'timeline_data': [{'date': 'Apr 7\u2009–\u200913, 2024', 'timestamp': '1712448000', 'values': [{'query': 'Samsung Galaxy S22', 'value': '85', 'extracted_value': 85}]}, {'date': 'Apr 14\u2009–\u200920, 2024', 'timestamp': '1713052800', 'values': [{'query': 'Samsung Galaxy S22', 'value': '90', 'extracted_value': 90}]}, {'date': 'Apr 21\u2009–\u200927, 2024', 'timestamp': '1713657600', 'values': [{'query': 'Samsung Galaxy S22', 'value': '97', 'extracted_value': 97}]}, {'date': 'Apr 28\u2009–\u2009May 4, 2024', 'timestamp': '1714262400', 'values': [{'query': 'Samsung Galaxy S22', 'value': '93', 'extracted_value': 93}]}, {'date': 'May 5\u2009–\u200911, 2024', 'timestamp': '1714867200', 'values': [{'query': 'Samsung Galaxy S22', 'value': '86', 'extracted_value': 86}]}, {'date': 'May 12\u2009–\u200918, 2024', 'timestamp': '1715472000', 'values': [{'query': 'Samsung Galaxy S22', 'value': '90', 'extracted_value': 90}]}, {'date': 'May 19\u2009–\u200925, 2024', 'timestamp': '1716076800', 'values

In [70]:
from serpapi import GoogleSearch

params = {
  "engine": "google_shopping",
  "q": "iPhone 15 6+256",  # Your search query
  "gl": "us",
  "hl": "en",
  "api_key": "54689b480842147086ac6f6b033f096ff9a76197e528284d3ee8cbb66e1de587"
}

search = GoogleSearch(params)
results = search.get_dict()

# Display all shopping results
shopping_results = results.get("shopping_results", [])
for result in shopping_results[:1]:
    print("Title:", result.get("title"))
    print("Product ID:", result.get("product_id"))  # <== You need this
    print("Price:", result.get("price"))
    print("Link:", result.get("link"))


Title: Apple iPhone 15
Product ID: 12455355607861195108
Price: $666.38
Link: None


In [71]:
### Feedback

from serpapi import GoogleSearch

API_KEY = "54689b480842147086ac6f6b033f096ff9a76197e528284d3ee8cbb66e1de587"

shopping_params = {
  "engine": "google_shopping",
  "q": "iPhone 15", 
  "gl": "us",
  "hl": "en",
  "api_key": API_KEY
}

search = GoogleSearch(params)
results = search.get_dict()

product_id = ""

# Display all shopping results
shopping_results = results.get("shopping_results", [])
for result in shopping_results[:1]:
    product_id = result.get("product_id")

params = {
  "engine": "google_product",
  "product_id": product_id,
  "reviews": "1",
  "gl": "us",
  "hl": "en",
  "api_key": API_KEY
}

search = GoogleSearch(params)
results = search.get_dict()
reviews_results = results["reviews_results"]
review_array = results["reviews_results"]
content = review_array['reviews']

for review in content:
    print(review['content'])
    print("------")

The Iphone 15 is a disappointment in performance and durability over my old Galaxy A51. The old A51 is a 4G phone that runs the Android OS. It did nothing special but was durable with no screen protector and a simple Incipio 2 piece case. After having the A51 for a few years I decided to upgrade because my employer uses Apple products to communicate. I was also waiting for Apple to stop using proprietary charging cords which seems wasteful and environmentally unfriendly.

I paid extra for shipping which is insulting after a one thousand dollar purchase. The phone arrived and I couldn't connect it to the 5G service or make calls. I had to wait on hold on my old phone as a customer service rep connected new my phone to the Verizon network. Once up and running I put on the same model Incipio case for the Iphone as I had used for the A51. No issues but the Incipio case doesn't protect the camera lenses very well. I had screen protectors ordered but they arrived 2 days later than the phone.