***This cheatsheet is prepared for instant usage and imminent easy findings for pandas and relevant tools for importing data. Personal requirements always differs according to the levels. This cheatsheat is designed for common requirements and widely checked codes. Jupyter notebook version 5.7.4 is used.***

# Version Check

In [None]:
import numpy as np  # I prefer to add numpy library. Suggestion
import pandas as pd
print(pd.__version__)

In [None]:
# Pandas also provides a utility function, pd.show_versions(), which reports the version of its dependencies as well:
import pandas as pd
print(pd.show_versions(as_json=True))

# Importing csv files

In [None]:
import numpy as np
import pandas as pd

# Assign the filename: file
file = '../_world/maritime_boundaries.csv'# Add URL in ' ' or location of the file in ''

# Read the whole file into a DataFrame: df
df = pd.read_csv(file)

# Read the first 5 rows of the file into a DataFrame: df
df = pd.read_csv(file, nrows=5, header=None)

# Print the head of the DataFrame df
df.head()
 
# Print the datatype of df
type(df)

# Importing txt files

In [None]:
# Assign a file: file
 
filename = 'taras_bulba.txt' 
file = open(filename, mode='r') # 'r' is to read, 'w' is to write
# you may enter '_.text' directly to filename position instead of assigning filename.
file = open('taras_bulba.txt', mode='r') # 'r' is to read, 'w' is to write

# Print it
print(file.read())

# Check whether file is closed
print(file.closed)

# Close file
file.close()

# Check whether file is closed
print(file.closed)

In [None]:
# Read & print the whole text
with open('taras_bulba.txt') as file:
    print(file.read())

# Read & print the first 3 lines   
with open('taras_bulba.txt') as file:
    print(file.readline())
    print(file.readline())
    print(file.readline())

# Importing flat files using NumPy

In [None]:
import numpy as np

# Assign the filename: file
file = 'computational_maths.txt'

# Load the data: df
df = np.loadtxt(file, delimiter=',')

# Print df
print(df)

# working-with-mixed-datatypes by using NumPy

In [None]:
#(np.genfromtxt()can handle the datasets which have different datatypes
in different columns; one column may contain strings and another floats.
dtype=None to arguments will figure out what types each column should be.)

import numpy as np

data = np.genfromtxt('../_datasets/titanic.csv', delimiter=',', names=True, dtype=None)

print(np.shape(data))

print(data['Fare'])

# Importing Excel Files

In [None]:
# Import pandas
import pandas as pd

# Assign spreadsheet filename: file
file = '../_datasets/battledeath.xlsx'

# Load spreadsheet: xl
xl = pd.ExcelFile(file)

# Parse the first sheet and rename the columns: df1
df1 = xl.parse(0, skiprows=[0], names=['Country', 'AAM due to War (2002)'])

# Print the head of the DataFrame df1
print(df1.head())

# Parse the first column of the second sheet and rename the column: df2
df2 = xl.parse(0, parse_cols=[0], skiprows=[0], names=['Country'])

# Print the head of the DataFrame df2
print(df2.head())

# Importing SAS files

In [None]:
import pandas as pd

# Import sas7bdat package
from sas7bdat import SAS7BDAT

# Save file to a DataFrame: df_sas
with SAS7BDAT('../_datasets/sales.sas7bdat') as file:
    df_sas = file.to_data_frame()

# Print head of DataFrame
print(df_sas.head())

# Importing Stata files

In [None]:
import pandas as pd

# Load Stata file into a pandas DataFrame: df
df = pd.read_stata('../_datasets/disarea.dta')

# Print the head of the DataFrame df
print(df.head())


# Importing HDF5 files

In [None]:
import h5py

# Assign filename: file
file = '../_datasets/LIGO_data.hdf5'

# Load file: data
data = h5py.File(file, 'r')

# Get the HDF5 group: group
group = data['strain']

# Check out keys of group
for key in group.keys():
    print(key)

# Set variable equal to time series data: strain
strain = data['strain']['Strain'].value

# Set number of time points to sample: num_samples
num_samples = 10000

# Set time vector
time = np.arange(0, 1, 1/num_samples)

# Import sheets from excel files

In [None]:
import pandas as pd

# Assign spreadsheet filename: file
file = '../_datasets/customer.xlsx'

# Load spreadsheet: xl
xl = pd.ExcelFile(file)

# Load a sheet into a DataFrame by name: df1
df1 = xl.parse('2012')

# Print the head of the DataFrame df1
print(df1.head())

# Load a sheet into a DataFrame by index: df2
df2 = xl.parse(0)

# Print the head of the DataFrame df2
print(df2.head())

# Import pickle file

In [None]:
import pickle

# Open pickle file and load data: d
with open('data.pkl', 'rb') as file:
    d = pickle.load(file)

# Print d
print(d)

# Print datatype of d
print(type(d))

# Importing a .mat file

In [None]:
import scipy.io

# Load MATLAB file: mat
mat = scipy.io.loadmat('../_datasets/albeck_gene_expression.mat')

# Print the datatype type of mat
print(type(mat))

# Importing flat files from the WEB

In [None]:
from urllib.request import urlretrieve

# Import pandas
import pandas as pd

# Assign url of file: url
url = 'https://s3.amazonaws.com/assets.datacamp.com/production/course_1606/datasets/winequality-red.csv'

# Save file locally
urlretrieve(url, 'winequality-red.csv')

# Read file into a DataFrame and print its head
df = pd.read_csv('winequality-red.csv', sep=';')
print(df.head())

# Importing files with GET requests

In [None]:
import requests

# Specify the url: url
url = "http://www.datacamp.com/teach/documentation"

# Packages the request, send the request and catch the response: r
r = requests.get(url)

# Extract the response: text
text = r.text

# Print the html
print(text)


# GET requests using urllib

In [None]:
from urllib.request import urlopen, Request

# Specify the url
url = "http://www.wikipedia.org"

# This packages the request: request
request = Request(url)

# Sends the request and catches the response: response
response = urlopen(request)

# Print the datatype of response
print(type(response))

# Be polite and close the response!
response.close()

# Getting Web Page as Data with BeautifulSoup

In [None]:
import requests
from bs4 import BeautifulSoup

# Specify url
url = 'https://www.python.org/~guido/'

# Package the request, send the request and catch the response: r
r = requests.get(url)

# Extracts the response as html: html_doc
html_doc = r.text

# create a BeautifulSoup object from the HTML: soup
soup = BeautifulSoup(html_doc)

# Print the title of Guido's webpage
print(soup.title)

# Find all 'a' tags (which define hyperlinks): a_tags
a_tags = soup.find_all('a')

# Print the URLs to the shell
for link in a_tags:
    print(link.get('href'))

In [None]:
import requests
from bs4 import BeautifulSoup

# Specify url: url
url = 'https://www.python.org/~guido/'

# Package the request, send the request and catch the response: r
r = requests.get(url)

# Extract the response as html: html_doc
html_doc = r.text

# Create a BeautifulSoup object from the HTML: soup
soup = BeautifulSoup(html_doc)

# Get the title of Guido's webpage: guido_title
guido_title = soup.title

# Print the title of Guido's webpage to the shell
print(guido_title)

# Get Guido's text: guido_text
guido_text = soup.get_text()

# Print Guido's text to the shell
print(guido_text)

# Loading JSONs in Python

In [None]:
import json

with open('snakes.json', 'r') as json_file:
    json_data = json.load(json_file)
type(json_data)


# Connecting to an API in Python

In [None]:
import requests
url = 'http://www.omdbapi.com/?t=hackers'
r = requests.get(url)
json_data = r.json()
for key, value in json_data.items():
    print(key + ':', value)

# Using Tweepy

In [None]:
import tweepy, json

access_token = "..."
access_token_secret = "..."
consumer_key = "..."
consumer_secret = "..."

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

In [None]:
import tweepy
import json

class MyStreamListener(tweepy.StreamListener):
    def __init__(self, api=None):
        super(MyStreamListener, self).__init__()
        self.num_tweets = 0
        self.file = open("tweets.txt", "w")
    def on_status(self, status):
        tweet = status._json
        self.file.write(json.dumps(tweet) + '\n')
        tweet_list.append(status)
        self.num_tweets += 1
        if self.num_tweets < 100:
            return True
        else:
            return False
        self.file.close()

In [None]:
# Initialize Stream listener
l = MyStreamListener()

# Create you Stream object with authentication
stream = tweepy.Stream(auth, l)


# Filter Twitter Streams to capture data by the keywords:
stream.filter(track=['clinton', 'trump', 'sanders', 'cruz'])
