Chapter#23

In [1]:
#CREATE TABLE and INSERT

class Table : 

    def __init__(self, columns): 
        self.columns = columns 
        self.rows = []
        
    def __repr__(self): 
        """pretty representation of the table: columns then rows""" 
        return str(self.columns) + " \n " + " \n ".join(map(str, self.rows))

    def insert(self, row_values): 
        if len(row_values) != len(self.columns): 
            raise TypeError ("wrong number of elements") 
        row_dict = dict(zip(self.columns, row_values)) 
        self.rows.append(row_dict)


users = Table (["user_id", "name", "num_friends"]) 
users.insert([0, "Hero", 0]) 
users.insert([1, "Dunn", 2]) 
users.insert([2, "Sue", 3]) 
users.insert([3, "Chi", 3]) 
users.insert([4, "Thor", 3]) 
users.insert([5, "Clive", 2]) 
users.insert([6, "Hicks", 3]) 
users.insert([7, "Devin", 2]) 
users.insert([8, "Kate", 2]) 
users.insert([9, "Klein", 3]) 
users.insert([10, "Jen", 1])

print(users)

['user_id', 'name', 'num_friends'] 
 {'user_id': 0, 'name': 'Hero', 'num_friends': 0} 
 {'user_id': 1, 'name': 'Dunn', 'num_friends': 2} 
 {'user_id': 2, 'name': 'Sue', 'num_friends': 3} 
 {'user_id': 3, 'name': 'Chi', 'num_friends': 3} 
 {'user_id': 4, 'name': 'Thor', 'num_friends': 3} 
 {'user_id': 5, 'name': 'Clive', 'num_friends': 2} 
 {'user_id': 6, 'name': 'Hicks', 'num_friends': 3} 
 {'user_id': 7, 'name': 'Devin', 'num_friends': 2} 
 {'user_id': 8, 'name': 'Kate', 'num_friends': 2} 
 {'user_id': 9, 'name': 'Klein', 'num_friends': 3} 
 {'user_id': 10, 'name': 'Jen', 'num_friends': 1}


In [2]:
#update

def update(self, updates, predicate): 
    for row in self.rows: 
        if predicate(row): 
            for column, new_value in updates.iteritems(): 
                row[column] = new_value


users.update({'num_friends' : 3}, 
             lambda row: row['user_id'] == 1)

AttributeError: 'Table' object has no attribute 'update'

In [4]:
#delete

def delete(self, predicate= lambda row: True):
        """delete all rows matching predicate 
        or all rows if no predicate supplied"""
        self.rows = [row for row in self.rows if not (predicate(row))]
    users.delete( lambda row: row["user_id"] == 1) 
    users.delete()

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 7)

In [15]:
#select

def select(self, keep_columns=None, additional_columns=None): 
    if keep_columns is None: 
        keep_columns = self.columns
    if additional_columns is None: 
        additional_columns = {}

    result_table = Table(keep_columns + additional_columns.keys())

    for row in self.rows: 
        new_row = [row[column] for column in keep_columns] 
        for column_name, calculation in additional_columns.iteritems(): 
            new_row.append(calculation(row)) 
        result_table.insert(new_row) 
    return result_table

def where(self, predicate= lambda row: True): 
    """return only the rows that satisfy the supplied predicate""" 
    where_table = Table(self.columns) 
    where_table.rows = filter(predicate, self.rows) 
    return where_table


def limit(self, num_rows): 
    """return only the first num_rows rows""" 
    limit_table = Table(self.columns) 
    limit_table.rows = self.rows[:num_rows] 
    return limit_table


users.select()
users.limit(2)
users.select(keep_columns=["user_id"])

users.where( lambda row: row["name"] == "Dunn") 
     .select(keep_columns=["user_id"])

def name_length(row): return len(row["name"]): 
users.select(keep_columns=[], 
             additional_columns = { "name_length" : name_length })

IndentationError: unexpected indent (<ipython-input-15-e0ad35bfdd19>, line 37)

In [6]:
#group by

def group_by(self, group_by_columns, aggregates, having=None): 
    grouped_rows = defaultdict(list) # populate groups 
    for row in self.rows: 
        key = tuple(row[column] for column in group_by_columns) 
        grouped_rows[key].append(row) 
        
        result_table = Table(group_by_columns + aggregates.keys()) 
        
        for key, rows in grouped_rows.iteritems(): 
            if having is None or having(rows): 
                new_row = list(key) 
                for aggregate_name, aggregate_fn in aggregates.iteritems(): 
                    new_row.append(aggregate_fn(rows)) 
                    result_table.insert(new_row)
                    
        return result_table 
    
def min_user_id(rows): return min(row["user_id"] for row in rows)
stats_by_length = users \ 
    .select(additional_columns={"name_length" : name_length}) \ 
    .group_by(group_by_columns=["name_length"],
              aggregates={ "min_user_id" : min_user_id, 
                           "num_users" : len })

def first_letter_of_name(row): 
    return row["name"][0] if row["name"] else ""

def average_num_friends(rows): 
    return sum(row["num_friends"] for row in rows) / len(rows)

def enough_friends(rows): 
    return average_num_friends(rows) > 1


avg_friends_by_letter = users \ 
    .select(additional_columns={'first_letter' : first_letter_of_name}) \ 
    .group_by(group_by_columns=['first_letter'], 
              aggregates={ "avg_num_friends" : average_num_friends }, 
              having=enough_friends)


def sum_user_ids(rows): return sum(row["user_id"] for row in rows) 
user_id_sum = users \ 
    .where( lambda row: row["user_id"] > 1) \ 
    .group_by(group_by_columns=[], 
              aggregates={ "user_id_sum" : sum_user_ids })



SyntaxError: unexpected character after line continuation character (<ipython-input-6-06462b8c2630>, line 21)

In [7]:
#order by

def order_by(self, order): 
    new_table = self.select() 
    new_table.rows.sort(key=order) 
    return new_table

friendliest_letters = avg_friends_by_letter \ 
    .order_by( lambda row: -row["avg_num_friends"]) \
    .limit(4)

SyntaxError: unexpected character after line continuation character (<ipython-input-7-ce17878874e4>, line 8)

In [8]:
#join

def join(self, other_table, left_join=False): 
    join_on_columns = [c for c in self.columns 
                       if c in other_table.columns]
    
    additional_columns = [c for c in other_table.columns  
                          if c not in join_on_columns]
    
    join_table = Table(self.columns + additional_columns) 
    for row in self.rows: 
        def is_join(other_row): 
            return all(other_row[c] == row[c] for c in join_on_columns) 
        other_rows = other_table.where(is_join).rows
        
        for other_row in other_rows: 
            join_table.insert([row[c] for c in self.columns] + 
                              [other_row[c] for c in additional_columns])
            
        if left_join and not other_rows: 
            join_table.insert([row[c] for c in self.columns] +
                              
                              [None for c in additional_columns])
        return join_table

sql_users = users \ 
    .join(user_interests) \ 
    .where( lambda row: row["interest"] == "SQL") \ 
    .select(keep_columns=["name"])

SyntaxError: unexpected character after line continuation character (<ipython-input-8-44807f54a725>, line 26)

In [9]:
#subquires

likes_sql_user_ids = user_interests \ 
    .where( lambda row: row["interest"] == "SQL") \ 
    .select(keep_columns=['user_id']) 

likes_sql_user_ids.group_by(group_by_columns=[], 
                            aggregates={ "min_user_id" : min_user_id })

SyntaxError: unexpected character after line continuation character (<ipython-input-9-71929274e800>, line 3)

In [10]:
#Query Optimization

user_interests \
     .join(users) \ 
     .where( lambda row: row["interest"] == "SQL") \ 
     .select(["name"])

SyntaxError: unexpected character after line continuation character (<ipython-input-10-bdf280e763d2>, line 4)

Chapter#9

In [5]:
import sys, re

regex = sys.argv[1]

for line in sys.stdin:  

    if re.search(regex, line):       

        sys.stdout.write(line)
        
count = 0

for line in sys.stdin:   

    count += 1

# print goes to sys.stdout

print (count)

0


In [1]:
#Reading Files

# this function is only to display the txt file for reading because the 'r' is for reading. 
file_for_reading = open('test.txt', 'r')
print(file_for_reading.read())
file_for_reading.close()

hi hello


In [2]:
# this function allow to the user to write on the txt file because the 'W' is for writing. 

file_for_writing = open('writing_file.txt', 'w')
print(file_for_writing.write('\n You are write :)'))
file_for_writing.close()

# this function allow to the user to add any pharse he wants on the txt file 
#without deleting the previous data in the file.
#becausethe 'W' is for writing. 
file_for_writing = open('writing_file.txt', 'w')
print(file_for_writing.write('\n You are write :)'))
file_for_writing.close()

18
18


In [6]:
#Delimited Files

import csv  

with open('www.txt', 'rb') as f: 
    reader = csv.reader(f, delimiter= ",") 
    for row in reader: 
        date = row[0] 
        symbol = row[1] 
        closing_price = float(row[2]) 
        process(date, symbol, closing_price)
        
with open('www.txt', 'r') as f:
    reader = csv.reader(f) 
    for row in reader:
        print(row)  

Error: iterator should return strings, not bytes (did you open the file in text mode?)

In [11]:
#Scraping the Web


from bs4 import BeautifulSoup 

import requests

html = requests.get("http://www.yahoo.com").text 
soup = BeautifulSoup(html, 'html5lib')

first_paragraph_text = soup.p.text 
first_paragraph_words = soup.p.text.split()

print(first_paragraph_text)
print(first_paragraph_words)


                            Many taxpayers are complaining that their refunds are either greatly reduced, or that they now owe the IRS, in the wake of the December 2017 tax cuts.

['Many', 'taxpayers', 'are', 'complaining', 'that', 'their', 'refunds', 'are', 'either', 'greatly', 'reduced,', 'or', 'that', 'they', 'now', 'owe', 'the', 'IRS,', 'in', 'the', 'wake', 'of', 'the', 'December', '2017', 'tax', 'cuts.']


In [12]:
#APIs (JSON)

import json
serialized = """{ "title" : "Data Science Book", 
                  "author" : "Joel Grus", 
                  "publicationYear" : 2014, 
                  "topics" : [ "data", "science", "data science"] }"""

deserialized = json.loads(serialized) 
if "data science" in deserialized["topics"]: 
    print(deserialized)

{'title': 'Data Science Book', 'author': 'Joel Grus', 'publicationYear': 2014, 'topics': ['data', 'science', 'data science']}


In [13]:
#Using an Unauthenticated API

import requests , json 
endpoint = "https://api.github.com/users/joelgrus/repos" 
repos = json.loads(requests.get(endpoint).text)