In [10]:
import boto3
import pandas as pd
import os
from dotenv import load_dotenv
load_dotenv()

True

## Importing Data from s3 through Boto3

In [11]:
s3 = boto3.client('s3', aws_access_key_id=os.getenv('ACCESS_KEY'), aws_secret_access_key=os.getenv('SECRET_ACCESS_KEY'))

In [12]:
def get_s3_object(s3_bucket, s3_object):
    return s3.get_object(Bucket=s3_bucket, Key=s3_object)

In [13]:
data = get_s3_object("andrey-makeschooldata", "Churn_Modelling.csv")
df = pd.read_csv(data['Body'])

In [15]:
df[:5]

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


## Importing data from SQL Database

### Setup Database

In [16]:
import sqlite3 as lite
con = lite.connect('population.db')

In [17]:
with con:
    cur = con.cursor()
    cur.execute("CREATE TABLE Population(id INTEGER PRIMARY KEY, country TEXT, population INT)")
    cur.execute("INSERT INTO Population VALUES(NULL,'Germany',81197537)")
    cur.execute("INSERT INTO Population VALUES(NULL,'France', 66415161)")
    cur.execute("INSERT INTO Population VALUES(NULL,'Spain', 46439864)")
    cur.execute("INSERT INTO Population VALUES(NULL,'Italy', 60795612)")
    cur.execute("INSERT INTO Population VALUES(NULL,'Spain', 46439864)")

### Read from Database

In [21]:
query1 = "SELECT country FROM Population WHERE population > 50000000;"
df1 = pd.read_sql_query(query1, con)
df1[:5]

Unnamed: 0,country
0,Germany
1,France
2,Italy


In [22]:
query2 = "SELECT * FROM Population WHERE population > 50000000;"
df2 = pd.read_sql_query(query2, con)
df2[:5]

Unnamed: 0,id,country,population
0,1,Germany,81197537
1,2,France,66415161
2,4,Italy,60795612


## Import data from MongoDB

### Setup MongoDB Database and insert document

In [25]:
from datetime import datetime
from pymongo import MongoClient

client = MongoClient()

db = client['dstutorial']
articles = db['articles']

doc = {
    "title": "An article about MongoDB and Python",
    "author": "Marco",
    "publication_date": datetime.utcnow(),
}

doc_id = articles.insert_one(doc).inserted_id

print(f"Inserted doc with id: {doc_id}")

Inserted doc with id: 5f501b1d53907fb89f4f834e


### Reading data from the MongoDB database

In [27]:
df = pd.DataFrame(list(articles.find()))

Unnamed: 0,_id,title,author,publication_date
0,5f501b1d53907fb89f4f834e,An article about MongoDB and Python,Marco,2020-09-02 22:22:21.874
