# Complete Pandas Tutorial (2024 Updated Edition)

In [1]:
import pandas as pd

## Intro to Dataframes
A DataFrame is the primary data structure in Pandas, designed specifically for handling and manipulating tabular data in Python. It provides a straightforward and powerful way to work with data sets, allowing for easy manipulation and visualization of tables. This functionality makes DataFrames great for efficiently organizing, accessing, and analyzing data in Python.

In [2]:
data = [[1,2,3],[4,5,6],[7,8,9],[10,11,12]]

df = pd.DataFrame(data, columns=['A', 'B', 'C'])

In [3]:
data = {
    'Day': ['Monday', 'Monday', 'Tuesday', 'Tuesday', 'Wednesday', 'Wednesday', 
            'Thursday', 'Thursday', 'Friday', 'Friday', 'Saturday', 'Saturday',
            'Sunday', 'Sunday'],
    'Coffee Type': ['Espresso', 'Latte', 'Espresso', 'Latte', 'Espresso', 'Latte', 
                    'Espresso', 'Latte', 'Espresso', 'Latte', 'Espresso', 'Latte',
                    'Espresso', 'Latte'],
    'Units Sold': [25, 15, 30, 20, 35, 25, 40, 30, 45, 35, 45, 35, 45, 35],
    'Price Per Unit': [3.99, 4.99, 3.99, 4.99, 3.99, 4.99, 3.99, 4.99, 3.99, 4.99, 3.99, 4.99, 3.99, 4.99]
}

coffee = pd.DataFrame(data, index=['Monday', 'Monday', 'Tuesday', 'Tuesday', 'Wednesday', 'Wednesday', 
            'Thursday', 'Thursday', 'Friday', 'Friday', 'Saturday', 'Saturday',
            'Sunday', 'Sunday'])

In [41]:
coffee.info()

<class 'pandas.core.frame.DataFrame'>
Index: 14 entries, Monday to Sunday
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Day             14 non-null     object 
 1   Coffee Type     14 non-null     object 
 2   Units Sold      14 non-null     int64  
 3   Price Per Unit  14 non-null     float64
dtypes: float64(1), int64(1), object(2)
memory usage: 560.0+ bytes


In [42]:
# print("DataFrame Info:\n", df.info(), "\n")  # df.info() prints directly, might not need concatenation
# print("Statistical Summary:\n", df.describe(), "\n")
# print("Shape of DataFrame: ", df.shape, "\n")
# print("Data Types:\n", df.dtypes, "\n")
# print("Column Names: ", df.columns.tolist(), "\n")  # Convert to list for a cleaner output
# print("Index Details: ", df.index, "\n")
# print("Unique Values in Each Column:\n", df.nunique(), "\n")
# print("The Unique Values in a specific Column:\n", df['A'].unique(), "\n")

# print("Memory Usage by Column (in bytes):\n", df.memory_usage(), "\n")
# print("Missing Values in Each Column:\n", df.isnull().sum(), "\n")

### Loading in data from files

In [64]:
coffee = pd.read_csv('./warmup-data/coffee.csv')

In [4]:
bios = pd.read_csv('./data/bios.csv')
results = pd.read_csv('./data/results.csv')

In [11]:
import pandas as pd
import sys
import os
file_names = ['bios', 'results']
writer = pd.ExcelWriter('./data/olympics-data.xlsx') # Arbitrary output name
for name in file_names:
    df = pd.read_csv(f"./data/{name}.csv")
    df.to_excel(writer,sheet_name=name, index=False)
writer.close()

AttributeError: 'OpenpyxlWriter' object has no attribute 'save'

In [66]:
bios.head()

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
0,1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02
1,2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,
2,3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17
3,4,Jacques Brugnon,1895-05-11,Paris VIIIe,Paris,FRA,France,168.0,64.0,1978-03-20
4,5,Albert Canet,1878-04-17,Wandsworth,England,GBR,France,,,1930-07-25


## Accessing Specific Data


In [67]:
coffee.head()

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35


.loc

In [69]:
coffee.loc[coffee.Day == "Monday", 'Units Sold'] = 4

coffee

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,4
1,Monday,Latte,4
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35
5,Wednesday,Latte,25
6,Thursday,Espresso,40
7,Thursday,Latte,30
8,Friday,Espresso,45
9,Friday,Latte,35


.iloc

.at & .iat