In [1]:
%autosave 5

Autosaving every 5 seconds


In [2]:
import numpy as np
import pandas as pd

In [3]:
! pip install httpx codetiming

Collecting httpx
  Downloading httpx-0.23.3-py3-none-any.whl (71 kB)
Collecting codetiming
  Downloading codetiming-1.4.0-py3-none-any.whl (7.2 kB)
Collecting rfc3986[idna2008]<2,>=1.3
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl (31 kB)
Collecting httpcore<0.17.0,>=0.15.0
  Downloading httpcore-0.16.3-py3-none-any.whl (69 kB)
Installing collected packages: rfc3986, httpcore, httpx, codetiming
Successfully installed codetiming-1.4.0 httpcore-0.16.3 httpx-0.23.3 rfc3986-1.5.0


In [42]:
df = pd.read_csv('popular_websites.csv')
df

Unnamed: 0,name,url,total_views
0,Google,https://www.google.com,520726800000.0
1,YouTube,https://www.youtube.com,235813200000.0
2,Facebook,https://www.facebook.com,223015700000.0
3,Yahoo,https://www.yahoo.com,125654400000.0
4,Wikipedia,https://www.wikipedia.org,44673640000.0
5,Baidu,https://www.baidu.com,44097590000.0
6,Twitter,https://twitter.com,30986760000.0
7,Yandex,https://yandex.com,28579800000.0
8,Instagram,https://www.instagram.com,26215200000.0
9,AOL,https://www.aol.com,23212320000.0


In [43]:
import httpx

In [46]:
def check_connection(name, url):
    try:
        response = httpx.get(url)
        location = response.headers.get('location')
        if location is None or location.startswith(url):
            print(f'{name} is online')
        else:
            print(f'{name} is online! But redirects to {location}')
        return True
    except httpx.ConnectError:
        print(f'Failed to establish a connection with {url}')
        return False

In [47]:
for website in df.itertuples():
    check_connection(website.name, website.url)

Google is online
YouTube is online
Facebook is online
Yahoo is online
Wikipedia is online
Baidu is online
Twitter is online
Yandex is online
Instagram is online
AOL is online
Netscape is online! But redirects to https://www.aol.com/
Failed to establish a connection with https://alwaysfails.example.com


In [48]:
for _, website in df.iterrows():
    check_connection(website['name'], website['url'])

Google is online
YouTube is online
Facebook is online
Yahoo is online
Wikipedia is online
Baidu is online
Twitter is online
Yandex is online
Instagram is online
AOL is online
Netscape is online! But redirects to https://www.aol.com/
Failed to establish a connection with https://alwaysfails.example.com


In [49]:
a = [1, 2, 3]
b = [4, 5, 6]
for a_int, b_int in zip(a, b):
    print(a_int, b_int)

1 4
2 5
3 6


In [50]:
websites = pd.read_csv('popular_websites.csv', index_col=0)
websites

Unnamed: 0_level_0,url,total_views
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Google,https://www.google.com,520726800000.0
YouTube,https://www.youtube.com,235813200000.0
Facebook,https://www.facebook.com,223015700000.0
Yahoo,https://www.yahoo.com,125654400000.0
Wikipedia,https://www.wikipedia.org,44673640000.0
Baidu,https://www.baidu.com,44097590000.0
Twitter,https://twitter.com,30986760000.0
Yandex,https://yandex.com,28579800000.0
Instagram,https://www.instagram.com,26215200000.0
AOL,https://www.aol.com,23212320000.0


In [51]:
total = 0
for website in websites.itertuples():
    total += website.total_views
    
total

1302981160000.0

In [53]:
sum(website.total_views for website in 
    websites.itertuples())

1302981160000.0

In [54]:
websites['total_views'].sum()

1302981160000.0

In [55]:
from codetiming import Timer

In [59]:
def loop_sum(websites):
    total = 0
    for website in websites.itertuples():
        total += website.total_views
    return total

In [60]:
def python_sum(websites):
    return sum(website.total_views for website in 
               websites.itertuples())

In [61]:
def pandas_sum(websites):
    return websites["total_views"].sum()

In [65]:
for func in [loop_sum, python_sum, pandas_sum]:
    websites = pd.read_csv("popular_websites.csv", index_col=0)
    with Timer(name=func.__name__, 
               text="{name:20}: {milliseconds:.2f} ms"):
        func(websites)

loop_sum            : 0.81 ms
python_sum          : 0.82 ms
pandas_sum          : 0.53 ms


In [69]:
for func in [pandas_sum, loop_sum, python_sum]:
    websites = pd.read_csv("popular_websites.csv", index_col=0)
    websites = pd.concat([websites for _ in range(1000)])
    with Timer(name=func.__name__, text="{name:20}: {milliseconds:.2f} ms"):
        func(websites)

pandas_sum          : 0.47 ms
loop_sum            : 17.57 ms
python_sum          : 18.01 ms


In [103]:
df = pd.read_csv('products.csv')
df

Unnamed: 0,month,sales,unit_price
0,january,3,0.5
1,february,2,0.53
2,march,5,0.55
3,april,10,0.71
4,may,8,0.66


In [105]:
def loop_sumsum(products):
    cumulative_sum = []
    for product in products.itertuples():
        income = product.sales * product.unit_price
        if cumulative_sum:
            cumulative_sum.append(cumulative_sum[-1] + income)
        else:
            cumulative_sum.append(income)
    return products.assign(cumulative_income=cumulative_sum)

In [108]:
def pandas_cumsum(products):
    return products.assign(
        income=lambda df: df['sales'] * df['unit_price'],
        cumulative_income=lambda df: df['income'].cumsum(),
    ).drop(columns='income')

In [111]:
for func in [loop_sumsum, pandas_cumsum]:
    products = pd.read_csv('products.csv')
    with Timer(name=func.__name__,
               text="{name:20}: {milliseconds:.2f} ms"):
        func(products)

loop_sumsum         : 1.96 ms
pandas_cumsum       : 3.71 ms
