# **Building a Smart Data Aggregator**

## *Assignment 2*

#### **Part 1:** User Data Processing with Lists

##### **-->** Filtering Users and Extracting their names

In [1]:
def filter_users(data):
    names = [i[1] for i in data if i[2]>30 and (i[3]=='Canada' or i[3]=='USA')]
    return names

##### --> Returning top 10 oldest users

In [2]:
def top_oldest(data):
    top = sorted(data, key = lambda i: i[2], reverse = True)[:10]
    return top

##### **-->** Returning Duplicate names

In [3]:
def duplicates(data):
    setcheck = set(); listdup = []
    for i in data:
        if i[1] not in setcheck:
            setcheck.add(i[1])
        else:
            listdup.append(i[1])
    return set(listdup)
    

### **Implementation 1**

In [4]:
data = [(1, 'Ali', 35, 'Pakistan'),
    (2, 'Babar', 28, 'India'),
    (3, 'Amna', 32, 'Bangladesh'),
    (4, 'Asma', 40, 'Korea'),
    (5, 'Esha', 22, 'Japan'),
    (6, 'Esha', 50, 'Pakistan'),]

top = top_oldest(data)
print("Top 10 oldest users: ", top)

dup = duplicates(data)
print("\nDuplicate names: ", dup)


Top 10 oldest users:  [(6, 'Esha', 50, 'Pakistan'), (4, 'Asma', 40, 'Korea'), (1, 'Ali', 35, 'Pakistan'), (3, 'Amna', 32, 'Bangladesh'), (2, 'Babar', 28, 'India'), (5, 'Esha', 22, 'Japan')]

Duplicate names:  {'Esha'}


#### **Part 2:** Immutable Data Management with Tuples

##### **-->** Unique users and integrity

In [5]:
def unique(data2):
    users = [i[1] for i in data2]
    return len(set(users))

#Tuples are immutable, so python itself won't allow any modification. Also we ain't modifying anything in our function so data will remain integrated

##### **-->** Returning Transaction with highest amout

In [6]:
def highest(data2):
    h = sorted(data2, key = lambda i: i[2], reverse = True)[0]
    return h

##### **-->** Returning transaction ids and user ids

In [7]:
def ids(data2):
    trans = [i[0] for i in data2]
    users = [i[1] for i in data2]
    return trans, users

#If tuple size varies, it might cause error because I we using indexing to extract data

### **Implementation 2**

In [8]:
t = [(101, 1, 250, '10:00'),
    (102, 2, 450, '05:00'),
    (103, 1, 120, '10:10'),
    (104, 3, 800, '01:15'),]

high = highest(t)
print("Transaction with highest amount: ", high)

t_ids, u_ids = ids(t)
print("Transaction IDs:", t_ids)
print("User IDs:", u_ids)

Transaction with highest amount:  (104, 3, 800, '01:15')
Transaction IDs: [101, 102, 103, 104]
User IDs: [1, 2, 1, 3]


#### **Part 3:** Unique Data Handling with Sets

##### **-->** Return who Visited A and B

In [9]:
def AandB(data3a, data3b):
    return data3a.intersection(data3b)

##### **-->** Returning who visited either A or C

In [10]:
def AorC(data3a, data3c):
    return data3a.symmetric_difference(data3c)

##### **-->** Updating Page a

In [11]:
def update(data3a, new):
    data3a.update(new)

##### **-->** Removing list of user ids

In [12]:
def remove(data3b, list):
    data3b.difference_update(list)

### **Implementation 3**

In [13]:
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7}
c = {5, 6, 8, 9}
new = {6, 7, 10}
rem = [4, 5]

update(a, new)
print("Updated Page: ", a)

remove(b, rem)
print("After Removal: ", b)

Updated Page:  {1, 2, 3, 4, 5, 6, 7, 10}
After Removal:  {3, 6, 7}


#### **Part 4:** Data Aggregation with Dictionaries

##### **-->** Filtering with rating 4+

In [14]:
def filter(data4):
    new = {}
    for i,j in data4.items():
        if j['rating'] >= 4:
            new[i] = j['ratings']
    return new

##### **-->** Sort by ratings

In [15]:
def sorting(new):
    top = sorted(new.items(), lambda i: i[1], reverse = True)[:5]
    return top

##### **-->** Combining Multiple Dictionaries

In [16]:
def combine(data4_list):
    combined = {}
    for i in data4_list:
        for j,k in i.items():
            if j in combined:
                combined[j]['rating'] = max(combined[j]['rating'], k['rating'])
                combined[j]['comments'].append(k['comments'])
            else:
                combined[j] = {'rating': k['rating'],
                              'comments': [k['comments']]}
    return combined
        

##### **-->** Getting ratings > 3 by Dict comprehension

In [17]:
def greater3(combined):
    greater = {i: j['rating'] for i,j in combined.items() if j['rating'] > 3}
    return greater

#### **Implementation 4**

In [18]:
f1 = {1: {'rating': 5, 'comments': ['comment 1']},
    2: {'rating': 3, 'comments': ['comment 2']},
    3: {'rating': 4, 'comments': ['comment 3']}}

f2 = {1: {'rating': 4, 'comments': ['comment 4']},
    3: {'rating': 2, 'comments': ['comment 5']},
    4: {'rating': 5, 'comments': ['comment 6']}}

comb = combine([f1, f2])
print("Combined Feedback: ", comb)

gr = greater3(comb)
print("\nFeedback with 3+ ratings: ", gr)


Combined Feedback:  {1: {'rating': 5, 'comments': [['comment 1'], ['comment 4']]}, 2: {'rating': 3, 'comments': [['comment 2']]}, 3: {'rating': 4, 'comments': [['comment 3'], ['comment 5']]}, 4: {'rating': 5, 'comments': [['comment 6']]}}

Feedback with 3+ ratings:  {1: 5, 3: 4, 4: 5}
