# Laptop Spec Analysis: Algorithm Efficiency and Data Analysis

#### Here, I will create and recreate a class object in python that becomes more efficient with each iteration.

In [8]:
import csv

with open("laptops.csv") as file:
    csv_reader = list(csv.reader(file))
    header = csv_reader[0]
    rows = csv_reader[1:]
    
print(header)

['Id', 'Company', 'Product', 'TypeName', 'Inches', 'ScreenResolution', 'Cpu', 'Ram', 'Memory', 'Gpu', 'OpSys', 'Weight', 'Price']


First iteration of the Inventory Class:

In [9]:
import csv

class Inventory():
    def __init__(self, csv_filename):
        with open(csv_filename) as file:
            csv_reader = list(csv.reader(file))
            self.header = csv_reader[0]
            self.rows = csv_reader[1:]
            for thing in self.rows:
                price = int(thing[-1])
                thing[-1] = price
                
                
                
                
thing = Inventory('laptops.csv')
print(thing.header)
print(len(thing.rows))
            

['Id', 'Company', 'Product', 'TypeName', 'Inches', 'ScreenResolution', 'Cpu', 'Ram', 'Memory', 'Gpu', 'OpSys', 'Weight', 'Price']
1303


Second iteration of the Inventory Class:

In [10]:
import csv

class Inventory():
    def __init__(self, csv_filename):
        with open(csv_filename) as file:
            csv_reader = list(csv.reader(file))
            self.header = csv_reader[0]
            self.rows = csv_reader[1:]
            for thing in self.rows:
                price = int(thing[-1])
                thing[-1] = price
                
    def get_laptop_from_id(self, laptop_id):
        for item in self.rows:
            if item[0] == laptop_id:
                return item
        return None
            

thing = Inventory('laptops.csv')
print(thing.get_laptop_from_id('3362737'))
print(thing.get_laptop_from_id('3362736'))

['3362737', 'HP', '250 G6', 'Notebook', '15.6', 'Full HD 1920x1080', 'Intel Core i5 7200U 2.5GHz', '8GB', '256GB SSD', 'Intel HD Graphics 620', 'No OS', '1.86kg', 575]
None


Third iteration of the Inventory Class:

In [11]:
import csv

class Inventory():
    def __init__(self, csv_filename):
        with open(csv_filename) as file:
            csv_reader = list(csv.reader(file))
            self.header = csv_reader[0]
            self.rows = csv_reader[1:]
            self.id_to_row = {}
            for thing in self.rows:
                price = int(thing[-1])
                thing[-1] = price
                id = thing[0]
                self.id_to_row[id] = thing
            
                
    def get_laptop_from_id(self, laptop_id):
        for item in self.rows:
            if item[0] == laptop_id:
                return item
        return None
    
    def get_laptop_from_id_fast(self, laptop_id):
        if laptop_id in self.id_to_row:
            return self.id_to_row[laptop_id]
        return None

thing = Inventory('laptops.csv')
print(thing.get_laptop_from_id_fast('3362737'))
print(thing.get_laptop_from_id_fast('3362736'))

['3362737', 'HP', '250 G6', 'Notebook', '15.6', 'Full HD 1920x1080', 'Intel Core i5 7200U 2.5GHz', '8GB', '256GB SSD', 'Intel HD Graphics 620', 'No OS', '1.86kg', 575]
None


### Here I compare how long each function takes to see which is more efficient (very important when working with large data sets):

In [12]:
import time
import random

ids = [str(random.randint(1000000, 9999999)) for num in range(1,10001)]

thing = Inventory('laptops.csv')

total_time_no_dict = 0

for id in ids:
    start = time.time()
    thing.get_laptop_from_id(id)
    end = time.time()
    total_time_no_dict += (end - start)
    
total_time_dict = 0

for id in ids:
    start = time.time()
    thing.get_laptop_from_id_fast(id)
    end = time.time()
    total_time_dict += (end - start)
    
print(total_time_no_dict)
print(total_time_dict)

1.0789353847503662
0.005608558654785156


Fourth iteration of the Inventory Class:

In [13]:
import csv

class Inventory():
    def __init__(self, csv_filename):
        with open(csv_filename) as file:
            csv_reader = list(csv.reader(file))
            self.header = csv_reader[0]
            self.rows = csv_reader[1:]
            self.id_to_row = {}
            for thing in self.rows:
                price = int(thing[-1])
                thing[-1] = price
                id = thing[0]
                self.id_to_row[id] = thing
            
    def get_laptop_from_id(self, laptop_id):
        for item in self.rows:
            if item[0] == laptop_id:
                return item
        return None
    
    def get_laptop_from_id_fast(self, laptop_id):
        if laptop_id in self.id_to_row:
            return self.id_to_row[laptop_id]
        return None
    
    def check_promotion_dollars(self, dollars):
        for row in self.rows:
            price1 = row[-1]
            if price1 == dollars:
                print(price1)
                return True
            for row in self.rows:
                price2 = row[-1]
                if price1 + price2 == dollars:
                    print(f"{price1}, {price2}")
                    return True
        return False
            
            

thing = Inventory('laptops.csv')
print(thing.check_promotion_dollars(1000))
print(thing.check_promotion_dollars(442))

745, 255
True
False


Fifth iteration of the Inventory Class:

In [14]:
import csv

class Inventory():
    def __init__(self, csv_filename):
        with open(csv_filename) as file:
            csv_reader = list(csv.reader(file))
            self.header = csv_reader[0]
            self.rows = csv_reader[1:]
            self.id_to_row = {}
            self.prices = set()
            for thing in self.rows:
                price = int(thing[-1])
                thing[-1] = price
                id = thing[0]
                self.id_to_row[id] = thing
                self.prices.add(price)   
                
    def get_laptop_from_id(self, laptop_id):
        for item in self.rows:
            if item[0] == laptop_id:
                return item
        return None
    
    def get_laptop_from_id_fast(self, laptop_id):
        if laptop_id in self.id_to_row:
            return self.id_to_row[laptop_id]
        return None
    
    def check_promotion_dollars(self, dollars):
        for row in self.rows:
            price1 = row[-1]
            if price1 == dollars:
                return True
            for row in self.rows:
                price2 = row[-1]
                if price1 + price2 == dollars:
                    return True
        return False
    
    def check_promotion_dollars_fast(self, dollars):
        for price in self.prices:
            price1 = price
            if price == dollars:
                return True
            for price in self.prices:
                price2 = price
                if price1 + price2 == dollars:
                    return True
        return False
    

thing = Inventory('laptops.csv')
print(thing.check_promotion_dollars_fast(1000))
print(thing.check_promotion_dollars_fast(442))

True
False


### Here I again compare how long each function takes to see which is more efficient:

In [15]:
prices = [random.randint(100, 5000) for num in range(1,101)]

total_time_no_set = 0

thing = Inventory('laptops.csv')

for price in prices:
    start = time.time()
    thing.check_promotion_dollars(price)
    end = time.time()
    total_time_no_set += (end - start)
    
total_time_set = 0

for price in prices:
    start = time.time()
    thing.check_promotion_dollars_fast(price)
    end = time.time()
    total_time_set += (end - start)
    

print(total_time_no_set)
print(total_time_set)

1.0492854118347168
0.21586871147155762


Sixth iteration of the Inventory Class:

In [16]:
import csv

class Inventory():
    def __init__(self, csv_filename):
        with open(csv_filename) as file:
            csv_reader = list(csv.reader(file))
            self.header = csv_reader[0]
            self.rows = csv_reader[1:]
            self.id_to_row = {}
            self.prices = set()
            self.prices_list = []
            for thing in self.rows:
                price = int(thing[-1])
                self.prices_list.append(price)
                thing[-1] = price
                id = thing[0]
                self.id_to_row[id] = thing
                self.prices.add(price)
            self.rows_by_price = sorted(self.prices_list)
            
    
                
    def get_laptop_from_id(self, laptop_id):
        for item in self.rows:
            if item[0] == laptop_id:
                return item
        return None
    
    def get_laptop_from_id_fast(self, laptop_id):
        if laptop_id in self.id_to_row:
            return self.id_to_row[laptop_id]
        return None
    
    def check_promotion_dollars(self, dollars):
        for row in self.rows:
            price1 = row[-1]
            if price1 == dollars:
                return True
            for row in self.rows:
                price2 = row[-1]
                if price1 + price2 == dollars:
                    return True
        return False
    
    def check_promotion_dollars_fast(self, dollars):
        for price in self.prices:
            price1 = price
            if price == dollars:
                return True
            for price in self.prices:
                price2 = price
                if price1 + price2 == dollars:
                    return True
        return False
    
    def find_first_laptop_more_expensive(self, price):
        for row in self.rows_by_price:
            if row > price:
                return self.rows_by_price.index(row)
        return -1
            
    
thing = Inventory('laptops.csv')
print(thing.find_first_laptop_more_expensive(1000))
print(thing.find_first_laptop_more_expensive(10000))

683
-1


# Conclusion:

The type of function used to view/sort/filter data is extremely important when dealing with large data sets. Even with this relatively small data set, there was an irrefutable difference in speed between the fast and the slow versions of each funcion.