In [None]:
import pandas as pd

In [None]:
import numpy as np

#1 Reading a CSV

In [None]:
file = "file.csv"

df = pd.read_csv(file)
print(df)

In [None]:
type(df)

#2 Storing a DataFrame to a CSV

In [None]:
df.to_csv("file.csv", sep = "|", index = False)

In [None]:
!cat file.csv

#3–4 Creating a DataFrame

### From a list of lists

In [None]:
data = [[1, 2, "A"], 
        [3, 4, "B"]]

df = pd.DataFrame(data, 
                columns = ["col1", "col2", "col3"])
print(df)

### From a Dictionary

In [None]:
data = {'col1': [1, 2], 
        'col2': [3, 4], 
        'col3': ["A", "B"]}

df = pd.DataFrame(data=data)
print(df)

#5 The Shape of the DataFrame

In [None]:
print(df)

print("Shape:", df.shape)

#6 Viewing Top N Rows

In [None]:
import string
l = string.ascii_uppercase

In [None]:
data = []
for i in range(10):
    data.append([2*i+1, 2*i+2, l[i]])

df = pd.DataFrame(data, columns = ["col1", "col2", "col3"])

In [None]:
print(df.head(5))

#7 Printing the Datatype of columns

In [None]:
df.dtypes

#8 Modifying the Datatype of a column

In [None]:
df["col1"] = df["col1"].astype(np.int8)

print(df.dtypes)

#9–10 Printing Descriptive Info about the DataFrame

### Method 1

In [None]:
df.info()

### Method 2

In [None]:
print(df.describe())

#11 Filling NaN values

In [None]:
df = pd.DataFrame([[1, 2, "A"], [np.nan, 4, "B"]], 
                columns = ["col1", "col2", "col3"])
print(df)

In [None]:
df.fillna(0, inplace = True)
print(df)

#12 Joining DataFrames

In [None]:
df1 = df
df2 = df

print(df1)
print(df2)

In [None]:
pd.merge(df1, df2, on = "col3")

#13 Sorting a DataFrame

In [None]:
df = pd.DataFrame([[1, 2,  "A"], 
                    [5, 8,  "B"], 
                    [3, 10, "B"]], 
                    columns = ["col1", "col2", "col3"])

print(df.sort_values("col1"))

In [None]:
df.sort_values("col1")

#14 Grouping a DataFrame

In [None]:
df = pd.DataFrame([[1, 2,  "A"], 
                    [5, 8,  "B"], 
                    [3, 10, "B"]], 
                    columns = ["col1", "col2", "col3"])

df.groupby("col3").agg({"col1":"sum", "col2":"max"})

#15 Renaming Column(s)

In [None]:
df = pd.DataFrame([[1, 2,  "A"], 
                   [5, 8,  "B"], 
                   [3, 10, "B"]], 
                  columns = ["col_A", "col2", "col3"])

df.rename(columns = {"col_A":"col1"})

#16 Deleting Column(s)

In [None]:
df = pd.DataFrame([[1, 2,  "A"], 
                    [5, 8,  "B"], 
                    [3, 10, "B"]], 
                    columns = ["col1", "col2", "col3"])

print(df.drop(columns = ["col1"]))

#17 Adding New Column(s)

### Method 1

In [None]:
df = pd.DataFrame([[1, 2], [3, 4]], 
                  columns = ["col1", "col2"])

df["col3"] = df["col1"] + df["col2"]
print(df)

### Method 2

In [None]:
df = pd.DataFrame([[1, 2], [3, 4]], 
                  columns = ["col1", "col2"])

df = df.assign(col3 = df["col1"] + df["col2"])

print(df)

#18–21 Filtering a DataFrame

### Method 1: Boolean Filtering

In [None]:
df = pd.DataFrame([[1, 2,  "A"], 
                    [5, 8,  "B"], 
                    [3, 10, "B"]], 
                    columns = ["col1", "col2", "col3"])

print(df[df["col2"] > 5])

In [None]:
df = pd.DataFrame([[1, 2,  "A"], 
                   [5, 8,  "B"], 
                   [3, 10, "C"]], 
                  columns = ["col1", "col2", "col3"])

filter_list = ["A", "C"]
print(df[df["col3"].isin(filter_list)])

### Method 2: Getting a Column

In [None]:
df["col1"] ## or df.col1

### Method 3: Selecting by Label

In [None]:
df = pd.DataFrame([[6, 5,  10], 
                    [5, 8,  6], 
                    [3, 10, 4]], 
                    columns = ["Maths", "Science", "English"],
                    index = ["John", "Mark", "Peter"])

print(df)

In [None]:
df.loc["John"]

In [None]:
df.loc["Mark", ["Maths", "English"]]

In [None]:
df.loc[0]

### Method 4: Selecting by Position

In [None]:
df.iloc[0]

#22–23 Finding Unique Values in a DataFrame

In [None]:
df = pd.DataFrame([[1, 2,  "A"], 
                   [5, 8,  "B"], 
                   [3, 10, "A"]], 
                  columns = ["col1", "col2", "col3"])

df["col3"].unique()

In [None]:
df["col3"].nunique()

#24 Applying a Function to a DataFrame

In [None]:
def add_cols(row):
    return row.col1 + row.col2

df = pd.DataFrame([[1, 2], 
                   [5, 8], 
                   [3, 9]], 
                  columns = ["col1", "col2"])
                  
df["col3"] = df.apply(add_cols, axis=1)
print(df)

In [None]:
def square_col(num):
    return num**2

df = pd.DataFrame([[1, 2], 
                   [5, 8], 
                   [3, 9]], 
                  columns = ["col1", "col2"])
                  
df["col3"] = df.col1.apply(square_col)
print(df)

#25–26 Handling Duplicates

In [None]:
df = pd.DataFrame([[1, "A"], 
                   [2, "B"], 
                   [1, "A"]], 
                  columns = ["col1", "col2"])
                  
df.duplicated(keep=False)

In [None]:
df = pd.DataFrame([[1, "A"], 
                   [2, "B"], 
                   [1, "A"]], 
                  columns = ["col1", "col2"])
                  
print(df.drop_duplicates())

#27 Finding the Distribution of Values

In [None]:
df = pd.DataFrame([[1, "A"], 
                   [2, "B"], 
                   [1, "A"]], 
                  columns = ["col1", "col2"])
                  
print(df.value_counts("col2"))

#28 Resetting the Index of a DataFrame

In [None]:
df = pd.DataFrame([[6, 5,  10], 
                   [5, 8,  6], 
                   [3, 10, 4]], 
                  columns = ["col1", "col2", "col3"],
                  index = [2, 3, 1])

print(df.reset_index())

In [None]:
df.reset_index(drop=True)

#29 Finding Cross-tabulation

In [None]:
df = pd.DataFrame([["A", "X"], 
                   ["B", "Y"], 
                   ["C", "X"],
                   ["A", "X"]], 
                  columns = ["col1", "col2"])

print(pd.crosstab(df.col1, df.col2))

In [None]:
df = pd.DataFrame([["A", "X"], 
                   ["B", "Y"], 
                   ["C", "X"],
                   ["A", "X"]], 
                  columns = ["col1", "col2"])

print(pd.crosstab(df.col1, df.col2))

#30 Pivoting DataFrames

In [None]:
df = df

print(df)

In [None]:
pd.pivot_table(df, 
               index = ["Name"],
               columns=["Subject"], 
               values='Marks',
               fill_value=0)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=1294e1ca-ac34-455b-bc8e-6b42c780bab8' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>