<a href="https://colab.research.google.com/github/MingzheHu-Duke/Data-Science-Tutorials/blob/main/Pandas/PandasTutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

**Pandas Series**

In [None]:
a = [1, 7, 2]

my_var = pd.Series(a)

print(my_var)

0    1
1    7
2    2
dtype: int64


In [None]:
print(my_var[0])

1


In [None]:
# Create labels
a = [1, 7, 2]

my_var = pd.Series(a, index=["x", "y", "z"])

print(my_var)

x    1
y    7
z    2
dtype: int64


In [None]:
# Access an item by referringr to the label
print(my_var["y"])

7


In [None]:
# Key/Value Objects as Series
calories = {"day1":1000, "day2":200, "day3":500}

my_var = pd.Series(calories)

print(my_var)

day1    1000
day2     200
day3     500
dtype: int64


In [None]:
# Select only some of the items in the dictionary
calories = {"day1":1000, "day2":200, "day3":500}

my_var = pd.Series(calories, index=["day1", "day2"])

print(my_var)

day1    1000
day2     200
dtype: int64


In [None]:
# Create an empty Series
s = pd.Series()
print(s)

Series([], dtype: float64)


  


In [None]:
# Create a Series from ndarray
import numpy as np
data = np.array(["a", "b", "c", "d"])
s = pd.Series(data)
print(s)

0    a
1    b
2    c
3    d
dtype: object


**DataFrames**

In [None]:
data = {
    "calories": [40, 300, 189],
    "durations": [400, 100, 22]
}

my_var = pd.DataFrame(data)

print(my_var)

   calories  durations
0        40        400
1       300        100
2       189         22


In [None]:
# Locate Row
df = my_var

print(df.loc[0])

calories      40
durations    400
Name: 0, dtype: int64


In [None]:
# Use list of indexes
print(df.loc[[0, 1]])

   calories  durations
0        40        400
1       300        100


In [None]:
# Named indexes
df = pd.DataFrame(data, index=["day1", "day2", "day3"])

print(df)

      calories  durations
day1        40        400
day2       300        100
day3       189         22


In [None]:
# locate Named Indexes
print(df.loc["day2"])

calories     300
durations    100
Name: day2, dtype: int64


**Load Files into a DataFrame**

In [None]:
df.to_csv("data.csv", index=True)

In [None]:
!test -f data.csv && echo "$FILE saved csv file exists!"

 saved csv file exists!


In [None]:
df = pd.read_csv("data.csv")

print(df)

  Unnamed: 0  calories  durations
0       day1        40        400
1       day2       300        100
2       day3       189         22


# DataFrame

In [2]:
df = pd.DataFrame()
print(df)

Empty DataFrame
Columns: []
Index: []


In [3]:
data = [1, 2, 3, 4, 5]
df = pd.DataFrame(data)

print(df)

   0
0  1
1  2
2  3
3  4
4  5


In [4]:
data = [["A", 10], ["B", 12], ["C", 13]]
df = pd.DataFrame(data, columns=["Name", "Age"])
print(df)

  Name  Age
0    A   10
1    B   12
2    C   13


In [6]:
data = [["A", 10], ["B", 12], ["C", 13]]
df = pd.DataFrame(data, columns=["Name", "Age"], dtype=float)
print(df)
print(df.dtypes)

  Name   Age
0    A  10.0
1    B  12.0
2    C  13.0
Name     object
Age     float64
dtype: object


In [8]:
data = {"Name": ["Tom", "Jack", "Steve", "Ricky"], "Age":[23, 24, 25, 32]}
df = pd.DataFrame(data)

print(df)

    Name  Age
0    Tom   23
1   Jack   24
2  Steve   25
3  Ricky   32


In [10]:
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'],'Age':[28,34,29,42]}
df = pd.DataFrame(data, index=['rank1','rank2','rank3','rank4'])
print(df)

        Name  Age
rank1    Tom   28
rank2   Jack   34
rank3  Steve   29
rank4  Ricky   42


In [13]:
data = [{"a":1, "b":2}, {"a":5, "b":10, "c":20}]

df = pd.DataFrame(data, index=["first", "Second"], columns=["a", "b1"])

print(df)

        a  b1
first   1 NaN
Second  5 NaN


**Column Selection**

In [15]:
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
   'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)

print(df["one"])

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64


In [16]:
print("Adding a new column by passing as Series:")
df["three"] = pd.Series([10, 20, 30], index=["a", "b", "c"])
print(df)
df["four"] = df["one"] + df["three"]
print(df)

Adding a new column by passing as Series:
   one  two  three
a  1.0    1   10.0
b  2.0    2   20.0
c  3.0    3   30.0
d  NaN    4    NaN
   one  two  three  four
a  1.0    1   10.0  11.0
b  2.0    2   20.0  22.0
c  3.0    3   30.0  33.0
d  NaN    4    NaN   NaN


In [17]:
# Column Deletion
del(df["one"])

In [18]:
print(df)

   two  three  four
a    1   10.0  11.0
b    2   20.0  22.0
c    3   30.0  33.0
d    4    NaN   NaN


In [19]:
df.pop("two")
print(df)

   three  four
a   10.0  11.0
b   20.0  22.0
c   30.0  33.0
d    NaN   NaN


In [22]:
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']), 
   'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)

# Row Selection By Label
print(df.loc["b"])

one    2.0
two    2.0
Name: b, dtype: float64


In [23]:
# Selection by integer location
print(df.iloc[2])

one    3.0
two    3.0
Name: c, dtype: float64


In [28]:
# Slice row
print(df[2:4])

   one  two
c  3.0    3
d  NaN    4


In [29]:
# Addition of Row
df = pd.DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
df2 = pd.DataFrame([[5, 6], [7, 8]], columns = ['a','b'])

df = df.append(df2)
print(df)

   a  b
0  1  2
1  3  4
0  5  6
1  7  8


In [30]:
# Delection of Rows
df = df.drop(0)
print(df)

   a  b
1  3  4
1  7  8
