## Pandas Basics

In [2]:
import pandas as pd
import numpy as np
import statistics as st

## creating DataFrame

In [None]:
# row wise - from list
d = [[1, 2, 3],
     [4, 5, 6],
     [7, 8, 9]]

df2 = pd.DataFrame(
    d, 
    index=['1', '2', '3'], 
    columns=['a', 'b', 'c'])   # column names

print(df2)

   a  b  c
1  1  2  3
2  4  5  6
3  7  8  9


In [None]:
# column wise
df = pd.DataFrame(
{
 "a" : [1 ,4, 7],       # col a
 "b" : [2, 5, 8],       # col b
 "c" : [3, 6, 9],
 "d" : [10, 11, 12],
 },
index = ["x", "y", "z"])      # row index

print(df)

   a  b  c   d
x  1  2  3  10
y  4  5  6  11
z  7  8  9  12


In [55]:
df["a"] # select column by name
df[["a", "c"]] # select columns by name
df[0:2] # select rows by index
df[["a", "c"]][1:] # select col and then slice rows

Unnamed: 0,a,c
y,4,6
z,7,9


In [None]:
# loc[] selects by name
df.loc["x"] # select row by name
df.loc[:"y"] # select rows by name sliced
df.loc[::2, ["a", "c"]]  # indexing rows  with steps and columns by name

In [None]:
# iloc[] selects by index
df.index
df.iloc[2] # select row 3 by index
df.iloc[:, -1] # select last column by index
df.iloc[::2, ::2] # indexing rows & columns with steps

## select columns

In [None]:
df.a # column a
df['a'] # column a
df.loc[:, 'a'] # all rows of column a

ape      1.0
human    NaN
alien    7.0
Name: a, dtype: float64

In [None]:
df.loc[: , ['a', 'b']] # all rows of column a & b
df[['a', 'b']] # col a&b

Unnamed: 0,a,b
ape,1.0,2
human,,5
alien,7.0,8


## select rows       

In [None]:
df.loc[['ape','alien']] # multiple rows in double brackets
df.loc['ape'] # by index

a    1.0
b    2.0
c    NaN
Name: ape, dtype: float64

## access by row and column

In [None]:
df = pd.DataFrame(np.arange(1,10).reshape(3,3), columns=['a', 'b', 'c'])
df

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [None]:
df.loc[[1, 2], "b"] # multiple rows same column
df.loc[[1, 2], ['a', 'c']]  # multiple rows multiple columns

df.loc[1, 'b'] # access value by row and column
df.loc[1, ["a", "b"]] # row  1 / col a & c

a    4
b    5
Name: 1, dtype: int64

## iloc() - select by (internal) index / position 

In [None]:
# row with index position 0
# indexing starts from zero 
df1.iloc[0]
df1.iloc[1]

a    4
b    5
c    6
Name: 2, dtype: int64

In [None]:
df1.iloc[:, 1] # column by index position

1    2.0
2    NaN
3    8.0
Name: b, dtype: float64

In [None]:
# Select data at the specified row and column location
df1.iloc[0, 0]

1

In [None]:
# Select list of rows and columns by position
df1.iloc[[1, 2], [0, 1]]

Unnamed: 0,a,b
2,4,
3,7,8.0


## slicing

In [None]:
df1.loc[1:2, :] # get rows - from : to & all columns
df1.loc[:, "b":"c"] # get all row from col 'b' & 'c'
df1.loc[1:3, "b":"c"]  # get rows from : to, of columns from:to
df1

Unnamed: 0,a,b,c
1,1,2.0,3.0
2,4,,
3,7,8.0,9.0


In [None]:
df1.iloc[0:3, :]  #  Slicing Rows and Columns by position
df1.iloc[:, 0:3]  # slice columns by index position.
df1.iloc[0:2, 1:3]  # slice row and columns by index position.
df1.iloc[:2, :2]  # row index 0 to index 1 (exclusive 2), column zero to one

Unnamed: 0,a,b
1,1,2.0
2,4,


## access cell - at[] / iat[]

In [None]:
df = pd.DataFrame(np.arange(1, 10).reshape(3, 3), columns=['a', 'b', 'c'], index=['ape', 'human', 'alien'])
df.iat[1, 0] = np.nan  # set value using row and column integer positions
df.at['ape' , 'c'] = np.nan  # set value using row and column labels
df

Unnamed: 0,a,b,c
ape,1.0,2,
human,,5,6.0
alien,7.0,8,9.0


## import data

In [None]:
pd.read_csv(filename) # Reads a CSV file
pd.read_table(filename) # Reads a delimited text file (like TSV)
pd.read_excel(filename) # Reads an Excel file
pd.read_sql(query, connection_object) # Reads a SQL table/database
pd.read_json(json_string) # Reads a JSON formatted string, URL or file
pd.read_html(url) # Parses an html, string or file and extracts tables to a list of df
pd.read_clipboard() # Reads the contents of your clipboard
pd.DataFrame(dict) # Reads from a dict; keys for columns names, values for data as lists

In [59]:
df = pd.read_csv("https://drive.google.com/uc?id=1oE-3rt17bFW7fOzDIjwFSEMPTIV3NvcO")
df

Unnamed: 0,school_id,grade,class,student_id,sex,nationality,grade_math_t1,grade_language_t1,grade_science_t1,grade_math_t2,grade_language_t2,grade_science_t2,treatment,date_of_birth
0,57,6,A,wvqgd@cunb.edu,1,1,2.046285,2.369783,2.711344,1.175309,1.498807,1.840368,1,1997-07-27
1,57,6,A,j0ihe@cunb.edu,1,2,7.859077,7.206984,6.994455,7.548812,6.896719,6.684190,1,1997-06-24
2,57,6,A,wcjgk@cunb.edu,2,1,7.118976,8.057449,8.573210,6.181019,7.119492,7.635253,1,1997-04-23
3,57,6,A,mzmqb@cunb.edu,1,1,6.973737,7.388008,6.628448,7.593102,8.007373,7.247812,1,1997-02-24
4,57,6,A,s6n0y@cunb.edu,1,1,6.574877,6.773626,7.844316,7.611065,7.809814,8.880504,1,1996-09-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1505,946,8,D,jj36e@cunb.edu,1,1,4.938375,5.306647,4.985917,6.162418,6.530690,6.209960,0,1995-07-04
1506,946,8,D,ca1dg@cunb.edu,1,1,7.661931,8.360153,7.937165,8.179364,8.877586,8.454598,0,1995-08-23
1507,946,8,D,amdrx@cunb.edu,1,1,9.248758,8.534791,8.056641,8.773601,8.059634,7.581484,0,1994-12-15
1508,946,8,D,yn5ug@cunb.edu,1,2,5.689228,5.071503,5.338547,6.382995,5.765270,6.032315,0,1994-09-18


## export 

In [None]:
df.to_csv(filename) # Writes to a CSV file
df.to_excel(filename) # Writes to an Excel file
df.to_sql(table_name, connection_object) # Writes to a SQL table
df.to_json(filename) # Writes to a file in JSON format
df.to_html(filename) # Writes to an HTML table
df.to_clipboard() # Writes to the clipboard