In [1]:
import pandas as pd 
import numpy as np 
# Make printing nicer 
pd.set_option('display.width', 120) 
pd.set_option('display.max_columns', 10) 
# ============================= 
# 1) SOURCE A: Read from WEB (UCI) 
# ============================= 
# UCI Iris CSV with header (commonly mirrored). If blocked, you can try seaborn's github mirror. 
url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv" 
iris_web = pd.read_csv(url) 
print("=== WEB READ: First 5 rows ===") 
print(iris_web.head(), "\n") 
# Standardize column names to the common textbook form (optional) 
# seaborn schema is: sepal_length, sepal_width, petal_length, petal_width, species 
iris_web = iris_web.rename(columns={ 
'sepal_length':'sepal_length', 
'sepal_width':'sepal_width', 
'petal_length':'petal_length', 
'petal_width':'petal_width', 
'species':'species' 
}) 
# ============================= 
# 2) Prepare local TEXT & EXCEL files (for the experiment) 
#    (This creates files once, so reading from text/excel works on any machine.) 
# ============================= 
csv_path = "iris_text.csv"     # text/CSV 
tsv_path = "iris_text.tsv"     # text/TSV 
xlsx_path = "iris_excel.xlsx"  # Excel 
# Create local files from the web dataframe (only if saving is allowed in your environment) 
iris_web.to_csv(csv_path, index=False) 
iris_web.to_csv(tsv_path, sep="\t", index=False) 
with pd.ExcelWriter(xlsx_path) as writer: 
    iris_web.to_excel(writer, sheet_name="iris", index=False) 
# ============================= 
# 3) SOURCE B: Read from TEXT (CSV/TSV) 
# ============================= 
iris_csv = pd.read_csv(csv_path) 
iris_tsv = pd.read_csv(tsv_path, sep="\t") 
print("=== TEXT/CSV READ: shape ===", iris_csv.shape) 
print("=== TEXT/TSV READ: shape ===", iris_tsv.shape, "\n") 
# ============================= 
# 4) SOURCE C: Read from EXCEL 
# ============================= 
iris_excel = pd.read_excel(xlsx_path, sheet_name="iris") 
print("=== EXCEL READ: First 3 rows ===") 
print(iris_excel.head(3), "\n") 
# ============================= 
# 5) Choose a working dataframe for analytics 
#    (Use the one read from web; others are identical after our save/load.) 
# ============================= 
df = iris_web.copy() 
# Ensure dtype sanity 
print("=== INFO ===") 
print(df.info(), "\n") 
print("=== DTYPE CHECK ===") 
print(df.dtypes, "\n") 
# ============================= 
# 6) DESCRIPTIVE ANALYTICS 
# ============================= 
# a) Overall descriptive stats (numeric) 
print("=== DESCRIBE (numeric) ===") 
print(df.describe(), "\n") 
# b) Check missing values 
print("=== MISSING VALUES BY COLUMN ===") 
print(df.isna().sum(), "\n") 
# c) Species distribution (categorical) 
print("=== SPECIES VALUE COUNTS ===") 
print(df['species'].value_counts(), "\n") 
# d) Grouped summaries by species (mean, std, min, max) 
group_summary = df.groupby('species').agg({ 
'sepal_length': ['mean', 'std', 'min', 'max'], 
'sepal_width':  ['mean', 'std', 'min', 'max'], 
'petal_length': ['mean', 'std', 'min', 'max'], 
'petal_width':  ['mean', 'std', 'min', 'max'], 
}) 
print("=== GROUPED SUMMARY BY SPECIES ===") 
print(group_summary, "\n") 
# e) Correlation matrix 
print("=== CORRELATION MATRIX (numeric only) ===") 
print(df.corr(numeric_only=True), "\n") 
# ============================= 
# 7) OPTIONAL: Quick percentiles and skew/kurtosis 
# ============================= 
q_summary = df[['sepal_length','sepal_width','petal_length','petal_width']].quantile([0.25, 0.5, 0.75]) 
print("=== QUARTILES (0.25, 0.5, 0.75) ===") 
print(q_summary, "\n") 
print("=== SKEWNESS ===") 
print(df[['sepal_length','sepal_width','petal_length','petal_width']].skew(), "\n") 
print("=== KURTOSIS ===") 
print(df[['sepal_length','sepal_width','petal_length','petal_width']].kurtosis(), "\n") 

=== WEB READ: First 5 rows ===
   sepal_length  sepal_width  petal_length  petal_width species
0           5.1          3.5           1.4          0.2  setosa
1           4.9          3.0           1.4          0.2  setosa
2           4.7          3.2           1.3          0.2  setosa
3           4.6          3.1           1.5          0.2  setosa
4           5.0          3.6           1.4          0.2  setosa 

=== TEXT/CSV READ: shape === (150, 5)
=== TEXT/TSV READ: shape === (150, 5) 

=== EXCEL READ: First 3 rows ===
   sepal_length  sepal_width  petal_length  petal_width species
0           5.1          3.5           1.4          0.2  setosa
1           4.9          3.0           1.4          0.2  setosa
2           4.7          3.2           1.3          0.2  setosa 

=== INFO ===
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_