## Http using Python vs Browser

In [None]:
import requests
r = requests.get('https://github.com/splicemachine/pysplice/blob/master/README.md')
print(r.status_code)
print(r.text)

In [None]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(r.text, 'html.parser')

print(soup.find_all('code'))

## Jupyter defaults to port 8888, but can be changed
### You can't access an IP unless it's port is made available

<code>jupyter notebook </code> <-- 8888<br>
<code>jupyter notebook --port=8889</code> <-- 8889

## SSH and SCP

In [None]:
!pip install -q scp
!pip install -q paramiko

In [None]:
%%writefile test.txt

my test file2

## Python equivalent of 
<code>scp -i $ex_pem test.txt $ex_host:/home/ubuntu/test.txt</code>

### Where
* ex_pem is your PEM file
* ex_host is your host IP address

[src](https://gist.github.com/batok/2352501)

In [None]:
from paramiko import SSHClient, RSAKey, AutoAddPolicy
from scp import SCPClient

# SSH Client
ssh = SSHClient()

# EC2 Host and Private Key
k = RSAKey.from_private_key_file(open('pem_loc.txt').read().strip())
host = open('ec2_host.txt').read().strip()

# Add new host if unknown
ssh.set_missing_host_key_policy(AutoAddPolicy())

ssh.connect(host,username='ubuntu',pkey=k)

# SCPCLient takes a paramiko transport as an argument
scp = SCPClient(ssh.get_transport())

# Uploading the 'test' directory with its content in the
# home (~) remote directory
scp.put('test.txt', recursive=True, remote_path='~')

scp.close()

## Boto3 and AWS S3

[src](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-examples.html)

In [None]:
!pip install -q boto3

In [None]:
import boto3

s3 = boto3.client('s3')
s3.download_file('splice-demo', 'customers-4000.csv', 'customers.csv')


## Read directly with Pandas (assuming it's public)

### For private files you'll need to provide credentials

In [None]:
!pip install -q s3fs

In [None]:
import pandas as pd
df = pd.read_csv('s3://splice-demo/customers-4000.csv', header=None, names=['ID', 'First', 'Last', "Full"])
df

In [None]:
df2 = df[df['ID'] <= 100]
df2.to_csv('small_customers.csv')

In [None]:
s3.upload_file('small_customers.csv', 'splice-demo', 'small_customers.csv')