In [1]:
import pandas as pd
import statistics
import numpy as np

# Declaration
# Descriptive Statistic for Total Animal in Every Class for Each Subphylum
df = pd.read_csv("animals_info new.csv")

# Unique values for "Class"
class_groups = df["Class"].unique().tolist()

# Unique values for "Subphylum"
subphylums_groups = df["Subphylum"].unique().tolist()

results = []

for cg in class_groups:
    for og in subphylums_groups:
        # Filter data by Class and Subphylum
        filtered_data = df[(df['Class'] == cg) & (df['Subphylum'] == og)]

        # Total Animals
        total_animals = filtered_data['Name'].count()

        if total_animals > 0:
            result_row = {
                'Class': cg,
                'Subphylum': og,
                'Total Animals': total_animals
            }

            results.append(result_row)

# Sort
res_df = pd.DataFrame(results)
res_df = res_df.sort_values(by=['Total Animals'], ascending=[False])
print(res_df)

             Class   Subphylum  Total Animals
11            Aves           0          10114
0         Mammalia  Vertebrata           5879
2         Reptilia  Vertebrata           4273
7       Gastropoda           0           3991
3         Amphibia           0           3393
1         Chordata    Animalia            600
6         Bivalvia           0            515
5      Cephalopoda           0            336
4         Animalia           0            232
9       Scaphopoda           0             13
8   Polyplacophora           0             10
10   Solenogastres           0              1


In [7]:
import pandas as pd
import statistics
import numpy as np

# Declaration
# Descriptive Statistic for Most Attribute in Animal
df = pd.read_csv("animals_info new.csv")

results = []
# Drop null Attributes
df = df.dropna(subset=["Attributes"])

# Total whole animal (total rows)
total_animals_whole = df.shape[0]

for idx, row in df.iterrows():
    # Split attributes by comma
    attributes = row["Attributes"].lower().split(",")

    for attr in attributes:
        attr = attr.strip()

        if attr and attr != "starts with":
            result_row = {
                'Attribute': attr,
                'Total Animals': 1
            }
            results.append(result_row)

# Convert to DataFrame
res_df = pd.DataFrame(results)

# Group and count
res_df = res_df.groupby("Attribute").sum().reset_index()

# Calculate percentage of whole animal
res_df["Percentage (%)"] = (res_df["Total Animals"] / total_animals_whole * 100).round(2)

# Sort 
res_df = res_df.sort_values(by="Total Animals", ascending=False).head(15)
print(res_df)

         Attribute  Total Animals  Percentage (%)
123    terrestrial          21814           74.31
87   not a migrant          10823           36.87
119         social           2316            7.89
76       migrating           1957            6.67
41         diurnal           1688            5.75
22       carnivore           1516            5.16
84       nocturnal           1288            4.39
28    congregatory           1233            4.20
120       solitary           1149            3.91
61       herbivore           1127            3.84
7         arboreal           1080            3.68
124    territorial            971            3.31
79        monogamy            923            3.14
3        altricial            852            2.90
67    insectivores            730            2.49


In [17]:
import pandas as pd
import statistics
import numpy as np

# Declaration
# Descriptive Statistic for Top 15 Attributes with Diet Type Counts
df = pd.read_csv("animals_info new.csv")

results = []

# Drop null Attributes
df = df.dropna(subset=["Attributes"])
for idx, row in df.iterrows():
    # Split attributes by comma
    attributes = row["Attributes"].lower().split(",")
    attributes = [a.strip() for a in attributes]

    # Check diet types in this animal
    is_carnivore = "carnivore" in attributes
    is_omnivore = "omnivore" in attributes
    is_herbivore = "herbivore" in attributes

    for attr in attributes:
        if attr and attr != "starts with":
            result_row = {
                "Attribute": attr,
                "Total Animals": 1,
                "Carnivore": 1 if is_carnivore else 0,
                "Omnivore": 1 if is_omnivore else 0,
                "Herbivore": 1 if is_herbivore else 0
            }
            results.append(result_row)

# Convert to DataFrame
res_df = pd.DataFrame(results)

# Group and sum
final_df = (res_df.groupby("Attribute").sum().reset_index())

# Sort
final_df = final_df.sort_values(by="Total Animals", ascending=False).head(15)
print(final_df)

         Attribute  Total Animals  Carnivore  Omnivore  Herbivore
123    terrestrial          21814       1314       586       1081
87   not a migrant          10823        846       414        797
119         social           2316        577       323        690
76       migrating           1957        383        88        141
41         diurnal           1688        794       346        641
22       carnivore           1516       1516        63         73
84       nocturnal           1288        653       218        405
28    congregatory           1233        293        59        100
120       solitary           1149        660       199        305
61       herbivore           1127         73        65       1127
7         arboreal           1080        449       253        436
124    territorial            971        451       206        387
79        monogamy            923        500       187        282
3        altricial            852        398       231        293
67    inse