In [1]:
import pandas as pd
import numpy as np

In [7]:
# Define column names as the dataset doesn't have headers
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']

# Load the dataset from URL
url = "/Iris.csv"
iris_df = pd.read_csv(url,names=column_names)

# Display first 5 rows
print(iris_df.head())

     sepal_length   sepal_width   petal_length   petal_width      species
Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
1             5.1           3.5            1.4           0.2  Iris-setosa
2             4.9           3.0            1.4           0.2  Iris-setosa
3             4.7           3.2            1.3           0.2  Iris-setosa
4             4.6           3.1            1.5           0.2  Iris-setosa


In [9]:
# Check for missing values
print("\nMissing values in each column:")
print(iris_df.isnull().sum())

# Get initial statistics
print("\nDataset statistics:")
print(iris_df.describe())

# Check dimensions of the dataframe
print("\nDataFrame dimensions:")
print(iris_df.shape)


Missing values in each column:
sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

Dataset statistics:
       sepal_length sepal_width petal_length petal_width      species
count           151         151          151         151          151
unique           36          24           44          23            4
top             5.0         3.0          1.5         0.2  Iris-setosa
freq             10          26           14          28           50

DataFrame dimensions:
(151, 5)


In [10]:
# Check data types
print("\nOriginal data types:")
print(iris_df.dtypes)

# All columns except species are already float, which is correct
# Species is object (string), which is correct for categorical data

# No type conversions needed in this case

# For demonstration, if we needed to convert species to category type:
iris_df['species'] = iris_df['species'].astype('category')

print("\nData types after conversion:")
print(iris_df.dtypes)


Original data types:
sepal_length    object
sepal_width     object
petal_length    object
petal_width     object
species         object
dtype: object

Data types after conversion:
sepal_length      object
sepal_width       object
petal_length      object
petal_width       object
species         category
dtype: object


In [11]:
# Convert species (categorical) to quantitative variables using one-hot encoding
iris_quantitative = pd.get_dummies(iris_df, columns=['species'])

# Display the transformed dataframe
print("\nDataFrame with categorical variable converted to quantitative:")
print(iris_df.head())

# Display the one-hot encoded version
print("\nOne-hot encoded version:")
print(iris_quantitative.head())


DataFrame with categorical variable converted to quantitative:
     sepal_length   sepal_width   petal_length   petal_width      species
Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
1             5.1           3.5            1.4           0.2  Iris-setosa
2             4.9           3.0            1.4           0.2  Iris-setosa
3             4.7           3.2            1.3           0.2  Iris-setosa
4             4.6           3.1            1.5           0.2  Iris-setosa

One-hot encoded version:
     sepal_length   sepal_width   petal_length   petal_width  \
Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm   
1             5.1           3.5            1.4           0.2   
2             4.9           3.0            1.4           0.2   
3             4.7           3.2            1.3           0.2   
4             4.6           3.1            1.5           0.2   

    species_Iris-setosa  species_Iris-versicolor  species_Iris-virginica  \
Id   