In [1]:
# Import required Python libraries
import pandas as pd
import numpy as np

# Locate open source data from the web
# Example URL from UCI Machine Learning Repository
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"

# Take user input for column names
column_names_input = input("Enter column names separated by commas: ")
column_names = [col.strip() for col in column_names_input.split(",")]

# Load the dataset into pandas dataframe
# Assuming the dataset has headers, if not, you may need to provide column names
df = pd.read_csv(url, names=column_names)

# Data Preprocessing
missing_values = df.isnull().sum()
initial_statistics = df.describe()

# Variable Descriptions, Types, and Dimensions
variable_descriptions = df.dtypes
data_dimensions = df.shape

# Data Formatting and Normalization
# (For this example, assuming all columns are numeric)

# Display the results
print("Missing Values:")
print(missing_values)
print("\nInitial Statistics:")
print(initial_statistics)
print("\nVariable Descriptions:")
print(variable_descriptions)
print("\nData Dimensions:")
print(data_dimensions)

Enter column names separated by commas:  age , class , number , id


Missing Values:
age       0
class     0
number    0
id        0
dtype: int64

Initial Statistics:
              age       class      number
count  150.000000  150.000000  150.000000
mean     3.054000    3.758667    1.198667
std      0.433594    1.764420    0.763161
min      2.000000    1.000000    0.100000
25%      2.800000    1.600000    0.300000
50%      3.000000    4.350000    1.300000
75%      3.300000    5.100000    1.800000
max      4.400000    6.900000    2.500000

Variable Descriptions:
age       float64
class     float64
number    float64
id         object
dtype: object

Data Dimensions:
(150, 4)


In [2]:
# Import required Python libraries
import pandas as pd
import numpy as np

# Locate open source data from the web
# Example URL from UCI Machine Learning Repository
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"

# Take user input for column names
column_names_input = input("Enter column names separated by commas: ")
column_names = [col.strip() for col in column_names_input.split(",")]

# Load the dataset into pandas dataframe
# Assuming the dataset has headers, if not, you may need to provide column names
df = pd.read_csv(url, names=column_names)

# Displaying head and tail of the dataset
print("Head of the dataset:")
print(df.head())
print("\nTail of the dataset:")
print(df.tail())

# Range of the numeric columns in the dataset
numeric_cols = df.select_dtypes(include=[np.number])
data_range = numeric_cols.max() - numeric_cols.min()
print("\nRange of the dataset (numeric columns only):")
print(data_range)

# Checking missing values
missing_values = df.isnull().sum()
print("\nMissing Values:")
print(missing_values)

# Data Formatting and Normalization
# Assuming all columns are numeric for simplicity

# Data Normalization
df_normalized = (numeric_cols - numeric_cols.min()) / (numeric_cols.max() - numeric_cols.min())

# Converting categorical variables into quantitative variables
# Assuming the last column is categorical, change it to quantitative
last_column_name = df.columns[-1]
df_quantitative = pd.get_dummies(df, columns=[last_column_name])

# Display the normalized dataset and converted categorical variables
print("\nNormalized Dataset:")
print(df_normalized.head())

print("\nCategorical Variables converted to Quantitative:")
print(df_quantitative.head())

# Variable Descriptions, Types, and Dimensions
variable_descriptions = df.dtypes
data_dimensions = df.shape

# Displaying the results
print("\nVariable Descriptions:")
print(variable_descriptions)
print("\nData Dimensions:")
print(data_dimensions)

Enter column names separated by commas:  sepal_length, sepal_width, petal_length, petal_width, species


Head of the dataset:
   sepal_length  sepal_width  petal_length  petal_width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa

Tail of the dataset:
     sepal_length  sepal_width  petal_length  petal_width         species
145           6.7          3.0           5.2          2.3  Iris-virginica
146           6.3          2.5           5.0          1.9  Iris-virginica
147           6.5          3.0           5.2          2.0  Iris-virginica
148           6.2          3.4           5.4          2.3  Iris-virginica
149           5.9          3.0           5.1          1.8  Iris-virginica

Range of the dataset (numeric columns only):
sepal_length    3.6
sepal_width     2.4
petal_length 