In [3]:
# Importing Pandas
# Pandas is a tool for working with database-style data
# Although it will not look the same, Pandas data presents similarly to an Excel worksheet or a SQL table
# Rows and columns, with values in each row and column (unless they're null)
import pandas as pd
# pd, while not mandatory, is the universal convention for the Pandas alias
# Far more important to data analysts than the foundational programming concepts covered earlier

In [5]:
# Pandas Series
# Create a regular list
x = ["Excel", "SQL", "Tableau", 4]
print(x)

['Excel', 'SQL', 'Tableau', 4]


In [6]:
# Convert our list to Pandas
x = pd.Series(x)
x

0      Excel
1        SQL
2    Tableau
3          4
dtype: object

In [37]:
# We can see that it is like a single column in Excel or SQL
# Like regular lists, values can be indexed and sliced
print(x[0])
print(x[1:3])

Excel
1        SQL
2    Tableau
dtype: object


In [38]:
# Indexes are 0 and up by default but can be changed
x = ["Excel", "SQL", "Tableau", "AWS"]
x = pd.Series(x, index = ["October", "November", "December", "January"])
print(x)

October       Excel
November        SQL
December    Tableau
January         AWS
dtype: object


In [39]:
# Now we can access elements using indexes instead of numbers
print(x["October"])

Excel


In [4]:
# Dictionaries to Series
subjects_dict = {
    "October": "Excel",
    "November": "SQL",
    "December": "Tableau",
    "January": "AWS"
}
x = pd.Series(subjects_dict)
print(x)

October       Excel
November        SQL
December    Tableau
January         AWS
dtype: object
{'October': 'Excel', 'November': 'SQL', 'December': 'Tableau', 'January': 'AWS'}


In [5]:
# Pandas DataFrames from Scratch
# DataFrames are just like Excel worksheets or SQL tables
# It's good to know how to create a DataFrame from scratch, although you won't do it often
# Create a dictionary first. This one's 3 x 3
# Instead of just one value, each value in the key value pair is a list
stocks = {
    'name': ["Apple", "Microsoft", "Google"],
    'ticker': ["AAPL", "MSFT", "GOOG"],
    'price': [200, 300, 100]
}
print(stocks)
print(stocks['name'][0])

{'name': ['Apple', 'Microsoft', 'Google'], 'ticker': ['AAPL', 'MSFT', 'GOOG'], 'price': [200, 300, 100]}
Apple


In [43]:
# Making the dictionary into a DataFrame is pretty easy
stocks_df = pd.DataFrame(stocks)
stocks_df
# Basic slicing/indexing
# print(stocks_df['name'][1])

Unnamed: 0,name,ticker,price
0,Apple,AAPL,200
1,Microsoft,MSFT,300
2,Google,GOOG,100


In [46]:
# Pandas DataFrames via Import
# Most of the time as a data analyst you will import dataframes from existing files
# Let's practice that
cars = pd.read_csv("cars.csv")
# No need to use print anymore while using Pandas
cars

Unnamed: 0.1,Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2
5,Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,0,3,1
6,Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
7,Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2
8,Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
9,Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4


In [45]:
# We can do almost the same thing with JSON files
# JSON is outside the scope of this course, but another useful file format that most often depends on web data
iris = pd.read_json("iris.json")
iris

Unnamed: 0,sepalLength,sepalWidth,petalLength,petalWidth,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [48]:
# When printing, it is clear that this resembles Excel/SQL data
# However, we have the superior power and flexibility of Python to better manipulate it
# Just look at one column
# cars["hp"]
# Note the difference between this and...
cars[["hp"]]

Unnamed: 0,hp
0,110
1,110
2,93
3,110
4,175
5,105
6,245
7,62
8,95
9,123


In [50]:
# Calculate min, max, sum, etc.
print(cars["hp"].sum())
print(cars["hp"].min())
print(cars["hp"].max())
print(cars["hp"].mean())

4694
52
335
146.6875
