In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import combinations

In [None]:
columns = ['sex', 'length', 'diameter', 'height', 'whole_weight', 'shucked_weight', 'viscera_weight', 'shell_weight', 'rings']
quantitative_columns = ['length', 'diameter', 'height', 'whole_weight', 'shucked_weight', 'viscera_weight', 'shell_weight', 'rings']
df = pd.read_csv('data.csv', header=None, names=columns)
df['sex'] = df['sex'].map({'F': 'Female', 'M': 'Male', 'I': 'Infant'})



In [None]:
counts = df['sex'].value_counts()
percent = df['sex'].value_counts(normalize=True) * 100
table = pd.DataFrame({'count': counts, '%': percent.round(2)})
table.index.name=None
table

In [None]:
numeric_cols = df[quantitative_columns]
summary_table = numeric_cols.describe().T
summary_table = summary_table.drop(columns=['count'])
summary_table

In [None]:
sns.set_style('whitegrid')
plt.figure(figsize=(8, 6))
sns.countplot(data=df, x='sex', color='skyblue')
plt.title('Liczba wystąpień dla każdej kategorii płci')
plt.xlabel('Płeć')
plt.ylabel('Liczba wystąpień')
plt.show()

In [None]:
fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(12, 16))
for ax, col in zip(axes.flatten(), quantitative_columns):
    sns.histplot(data=df, x=col, bins=20, ax=ax, color='red')
    ax.set_title(col)
plt.tight_layout()
plt.show()

In [None]:
pairs = list(combinations(quantitative_columns, 2))
fig, axes = plt.subplots(nrows=14, ncols=2, figsize=(16, 56))
for ax, (x_col, y_col) in zip(axes.flatten(), pairs):
    sns.scatterplot(data=df, x=x_col, y=y_col, ax=ax, s=10)
    ax.set_xlabel(x_col)
    ax.set_ylabel(y_col)
    ax.set_title(f'{x_col} vs {y_col}')
plt.tight_layout()
plt.show()

In [None]:
correlation_matrix = df[quantitative_columns].corr()
correlation_matrix

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Heatmapa macierzy korelacji zmiennych ilościowych')
plt.show()

In [None]:
sns.lmplot(x='length', y='diameter', data=df, height=6, aspect=1.5, scatter_kws={'alpha':0.5},line_kws={'color': 'red'})
plt.title('Regresja liniowa: Length vs Whole Weight')
plt.show()