In [14]:
#imports
!pip install requests
!pip install beautifulsoup4

import csv
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np



In [110]:
# Get TAC codes from CSV file and store it in a tuple
def read_tac_data(csv_file):
    tac_data = []
    with open(csv_file, newline='') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            # Check if the row has at least three values
            if len(row) >= 3:
                tac_code, company, model = row[:3]  # Unpack the first three values
                tac_data.append((tac_code, company, model))

    return tac_data

In [111]:
# Store TAC's and Model Names and companies in variable tac_data
csv_file = 'tacdb.csv'
tac_data = read_tac_data(csv_file)
#print(tac_data)

In [39]:
# Function to extract Product Title
def get_title(soup):

    try:
        # Outer Tag Object
        title = soup.find("span", attrs={"id":'productTitle'})

        # Inner NavigatableString Object
        title_value = title.text

        # Title as a string value
        title_string = title_value.strip()

    except AttributeError:
        title_string = ""

    return title_string

# Function to extract Product Price
def get_price(soup):

    try:
        price = soup.find("span", attrs={'id':'priceblock_ourprice'}).string.strip()

    except AttributeError:

        try:
            # If there is some deal price
            price = soup.find("span", attrs={'class':'a-offscreen'}).string.strip()

        except:
            price = ""

    return price

# Function to extract Product Rating
def get_rating(soup):

    try:
        rating = soup.find("i", attrs={'class':'a-icon a-icon-star a-star-4-5'}).string.strip()

    except AttributeError:
        try:
            rating = soup.find("span", attrs={'class':'a-icon-alt'}).string.strip()
        except:
            rating = ""

    return rating

# Function to extract Number of User Reviews
def get_review_count(soup):
    try:
        review_count = soup.find("span", attrs={'id':'acrCustomerReviewText'}).string.strip()

    except AttributeError:
        review_count = ""

    return review_count

# Function to extract Availability Status
def get_availability(soup):
    try:
        available = soup.find("div", attrs={'id':'availability'})
        available = available.find("span").string.strip()

    except AttributeError:
        available = "Not Available"

    return available

In [104]:
def get_phone_from_tac(tac_code):
    try:
        with open('tacdb.csv', newline='') as csvfile:
            reader = csv.reader(csvfile)
            for row in reader:
                if row and row[0] == tac_code:
                    return row[1], row[2]  # Return company and model
    except FileNotFoundError:
        print("TAC database file not found.")
    except Exception as e:
        print(f"An error occurred while retrieving phone information: {e}")

    return None

# Example usage:
tac_code = '01294000'  # Example TAC code
phone_info = get_phone_from_tac(tac_code)
if phone_info:
    company, model = phone_info
    print(f"The phone with TAC code {tac_code} is manufactured by {company} and the model is {model}.")
else:
    print(f"No phone information found for TAC code {tac_code}.")

The phone with TAC code 01294000 is manufactured by Apple and the model is iPhone 4s.


In [82]:
def scrape_amazon_phone_data(phone_name):
    # Add your user agent
    HEADERS = {'User-Agent': '', 'Accept-Language': 'en-US, en;q=0.5'}

    # The webpage URL with the phone name
    URL = f"https://www.amazon.ca/s?k={phone_name.replace(' ', '+')}&ref=nb_sb_noss_2"

    # HTTP Request
    webpage = requests.get(URL, headers=HEADERS)

    # Soup Object containing all data
    soup = BeautifulSoup(webpage.content, "html.parser")

    # Fetch links as List of Tag Objects
    links = soup.find_all("a", attrs={'class': 'a-link-normal s-no-outline'})

    # Store the links
    links_list = []

    # Loop for extracting links from Tag Objects
    for link in links:
        links_list.append(link.get('href'))

    d = {"title": [], "price": [], "rating": [], "reviews": [], "availability": []}

    # Loop for extracting product details from each link
    for link in links_list:
        new_webpage = requests.get("https://www.amazon.ca" + link, headers=HEADERS)
        new_soup = BeautifulSoup(new_webpage.content, "html.parser")

        # Function calls to display all necessary product information
        d['title'].append(get_title(new_soup))
        d['price'].append(get_price(new_soup))
        d['rating'].append(get_rating(new_soup))
        d['reviews'].append(get_review_count(new_soup))
        d['availability'].append(get_availability(new_soup))

    amazon_df = pd.DataFrame.from_dict(d)
    amazon_df['title'].replace('', np.nan, inplace=True)
    amazon_df = amazon_df.dropna(subset=['title'])
    return amazon_df

#Testing
#phone_name = "iphone 12"
#amazon_data = scrape_amazon_phone_data(phone_name)
#amazon_data


In [100]:
def get_phone_data_from_tac(tac_code):
    company, model = get_phone_from_tac(tac_code)
    if company and model:
        phone_name = f"{company} {model}"
        return scrape_amazon_phone_data(phone_name)
    else:
        print(f"No phone information found for TAC code {tac_code}.")
        return None


In [107]:
# Example usage:
#tac_code = '01294000'  # Example TAC code for iPhone 4s
tac_code = input("Please enter the TAC code: ").strip()  # Ensure no leading or trailing whitespace
print("Entered TAC code:", tac_code)  # Debugging print statement

phone_info = get_phone_from_tac(tac_code)
if phone_info:
    company, model = phone_info
    print(f"The phone with TAC code {tac_code} is manufactured by {company} and the model is {model}.")
    amazon_data = get_phone_data_from_tac(tac_code)
    if amazon_data is not None:
        print(f"Amazon Data for TAC code {tac_code}:")
        print(amazon_data)
    else:
        print(f"No Amazon data found for TAC code {tac_code}.")
else:
    print(f"No phone information found for TAC code {tac_code}.")

Please enter the TAC code: 01294000
Entered TAC code: 01294000
The phone with TAC code 01294000 is manufactured by Apple and the model is iPhone 4s.
Amazon Data for TAC code 01294000:
                                                title    price  \
0   Apple iPhone 4S 16GB Unlocked GSM - Black (Ref...  $199.00   
1   iPhone 4s Cable USB Sync and Charging Cable fo...    $6.66   
2   iPhone 4S Case, iPhone 4 Cover, Jeylly Shock A...   $11.85   
3   Cbiumpro iPhone 4 Charging Cables (3 Pack 1M) ...   $12.97   
4   Case for iPhone 4, iPhone 4S Case with [2 Pack...   $17.99   
5   Wedawn iPhone 4s Cable iPad 2 Cable [Apple Cer...    $6.95   
6   Apple iPhone SE 32GB Factory Unlocked Space Gr...   $89.00   
7   30-Pin to USB Cable, iPhone 4s Cable 3.3 Ft US...    $8.99   
8   kwmobile Case Compatible with Apple iPhone 4 /...    $6.99   
9   Biss 30 Pin to USB Data Sync Charging Cable fo...    $6.49   
10  kwmobile Case Compatible with Apple iPhone 4 /...   $11.99   
11  UMIDIGI A11 Unlocked