# What is pandas?

In [2]:
import pandas as pd
print(pd.__doc__)


pandas - a powerful data analysis and manipulation library for Python

See http://pandas.pydata.org/ for full documentation. Otherwise, see the
docstrings of the various objects in the pandas namespace:

Series
DataFrame
Panel
Index
DatetimeIndex
HDFStore
bdate_range
date_range
read_csv
read_fwf
read_table
ols



# The most important thing: DataFrames

In [31]:
namesAges = [
    ["Sally", 6], 
    ["Joe", 10],
    ["John", 7],
    ["Kelly", 8]
] # just a list of lists

df = pd.DataFrame(
    data = namesAges, 
    columns = ["Names", "Ages"]
)

In [8]:
df

Unnamed: 0,Names,Ages
0,Sally,5
1,Joe,10
2,John,7
3,Kelly,8


# Looking inside the DataFrame

## Rows and columns

In [21]:
df.columns

Index(['Names', 'Ages'], dtype='object')

In [22]:
df.index

RangeIndex(start=0, stop=4, step=1)

## Iterating

In [9]:
for column in df:
    print(column)

Names
Ages


## Indexing

In [11]:
df["Names"]

0    Sally
1      Joe
2     John
3    Kelly
Name: Names, dtype: object

In [12]:
df["Ages"]

0     5
1    10
2     7
3     8
Name: Ages, dtype: int64

## df.loc[row, col]

Accesses rows according to the ```df.index``` and columns according to ```df.columns```

In [13]:
df.loc[1]

Names    Joe
Ages      10
Name: 1, dtype: object

In [14]:
df.loc[2]

Names    John
Ages        7
Name: 2, dtype: object

In [15]:
df.loc[2, "Names"]

'John'

In [16]:
df.loc[2, "Ages"]

7

## df.iloc[rowInt, columnInt]

Same as loc, but it's all integer indexing.

In [19]:
df.iloc[2, 0]

'John'

In [20]:
df.iloc[2, 1]

7

## df.shape and size

In [42]:
df.shape

(4, 3)

In [43]:
df.size

12

# Changing values in a pd.DataFrame

In [32]:
df.loc[2, "Ages"]

7

In [33]:
df.loc[2, "Ages"] += 10

In [34]:
df

Unnamed: 0,Names,Ages
0,Sally,5
1,Joe,10
2,John,17
3,Kelly,8


In [40]:
df.insert(loc = 2, value = ["Math", "Music", "Science", "Geography"], column = "Favorite Class")

In [41]:
df

Unnamed: 0,Names,Ages,Favorite Class
0,Sally,5,Math
1,Joe,10,Music
2,John,17,Science
3,Kelly,8,Geography


## Saving as csv

In [25]:
df.to_csv("NamesAndAges.csv")

In [26]:
import os
os.listdir('.') # Can you find it in your current directory?

['.ipynb_checkpoints',
 'Introduction to Numpy.ipynb',
 'NamesAndAges.csv',
 'pandas.ipynb']

# Opening csvs!

In [44]:
df2 = pd.read_csv("boston.csv") # This csv is in the current folder, so it's just that easy to open!
df2.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PT,B,LSTAT,MV
0,0.00632,18.0,2.31,0,0.538,6.575,65.199997,4.09,1,296,15.3,396.899994,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.900002,4.9671,2,242,17.799999,396.899994,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.099998,4.9671,2,242,17.799999,392.829987,4.03,34.700001
3,0.03237,0.0,2.18,0,0.458,6.998,45.799999,6.0622,3,222,18.700001,394.630005,2.94,33.400002
4,0.06905,0.0,2.18,0,0.458,7.147,54.200001,6.0622,3,222,18.700001,396.899994,5.33,36.200001


More about this dataset if you're curious:

https://www.cs.toronto.edu/~delve/data/boston/bostonDetail.html


The ```df.head()``` method returns the top 5 rows of the dataframe, so we can have a look at the dataset without having to scroll very much

In [45]:
df2.shape

(506, 14)

Learn more about using panadas at the link below! It's a 10-minute tutorial!

https://pandas.pydata.org/pandas-docs/stable/10min.html
