# Pandas Basics

## 📥 Download or Load Iris Dataset (CSV)

In [1]:
import pandas as pd

# Load the dataset from UCI repo or local file
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"

columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
iris = pd.read_csv(url, names=columns)


## 1. Core Data Structures

In [2]:
print(type(iris))             # <class 'pandas.core.frame.DataFrame'>
print(iris.dtypes)            # Data types of each column
print(iris.columns.tolist())  # List of columns


<class 'pandas.core.frame.DataFrame'>
sepal_length    float64
sepal_width     float64
petal_length    float64
petal_width     float64
species          object
dtype: object
['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']


 2. Reading & Writing Data

In [3]:
iris.to_csv("iris_copy.csv", index=False)         # Write to CSV
iris_loaded = pd.read_csv("iris_copy.csv")        # Read from CSV


## 3. Data Selection & Indexing

In [4]:
print(iris['sepal_length'].head())                # Single column
print(iris[['sepal_length', 'species']].head())   # Multiple columns

print(iris.iloc[0:5, 0:2])                         # Index-based slicing
print(iris.loc[0:4, ['sepal_width', 'species']])   # Label-based slicing


0    5.1
1    4.9
2    4.7
3    4.6
4    5.0
Name: sepal_length, dtype: float64
   sepal_length      species
0           5.1  Iris-setosa
1           4.9  Iris-setosa
2           4.7  Iris-setosa
3           4.6  Iris-setosa
4           5.0  Iris-setosa
   sepal_length  sepal_width
0           5.1          3.5
1           4.9          3.0
2           4.7          3.2
3           4.6          3.1
4           5.0          3.6
   sepal_width      species
0          3.5  Iris-setosa
1          3.0  Iris-setosa
2          3.2  Iris-setosa
3          3.1  Iris-setosa
4          3.6  Iris-setosa


## 4. Data Cleaning

In [5]:
iris.loc[0, 'sepal_length'] = None                 # Introduce NaN
print(iris.isnull().sum())                         # Count NaNs
iris_filled = iris.fillna(0)                       # Fill NaN with 0
iris_dropped = iris.dropna()                       # Drop NaN rows

iris = iris.rename(columns={'sepal_length': 'sepal_len'})  # Rename
iris['petal_width'] = iris['petal_width'].astype(float)    # Change dtype

sepal_length    1
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64


## 5. Data Manipulation

In [10]:
iris['sepal_plus_petal'] = iris['sepal_len'] + iris['petal_length']   # Add new column

iris = iris.drop(columns=['sepal_plus_petal'])                        # Drop column

iris['species_upper'] = iris['species'].str.upper()                   # String operation

iris['squared_len'] = iris['sepal_len'].apply(lambda x: x**2 if pd.notnull(x) else x)  # Lambda


## 6. Aggregation & Grouping

In [7]:
grouped = iris.groupby('species').agg({
    'sepal_len': 'mean',
    'petal_length': ['mean', 'max']
})
print(grouped)

print(iris['species'].value_counts())


                sepal_len petal_length     
                     mean         mean  max
species                                    
Iris-setosa      5.004082        1.464  1.9
Iris-versicolor  5.936000        4.260  5.1
Iris-virginica   6.588000        5.552  6.9
species
Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: count, dtype: int64


## 7. Sorting & Ranking

In [11]:
iris_sorted = iris.sort_values(by='sepal_len', ascending=False)
iris['sepal_rank'] = iris['sepal_len'].rank()


## 8. Merging & Joining

In [12]:
# Dummy DataFrame for joining
label_df = pd.DataFrame({
    'species': ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'],
    'label': [0, 1, 2]
})

merged_df = pd.merge(iris, label_df, on='species', how='left')