In [None]:
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

# Generate fake data
np.random.seed(42)
num_points = 100

# Create a list of points
points = []
for _ in range(num_points):
    point = {
        'x': np.random.normal(0, 1),
        'y': np.random.normal(0, 1),
        'z': np.random.normal(0, 1)
    }
    points.append(point)

# Create scatterplot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

for point in points:
    ax.scatter(point.get("x"), point.get("y"), point.get("z"), color="red", marker="x")

# Set labels and title
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')




In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


acronym_to_name = {
    "bpm": "Beats per Minute (bpm)",
    "nrgy": "Energy (ngry)",
    "dnce": "Danceability (dnce)",
    "val": "Mood (val)",
    "pop": "Popularity (pop)"
}


def analyze_feature_individually(feature):
    feature_mean = feature.mean()
    feature_variance = feature.var()
    
    print(f"Analyzed Feature: {acronym_to_name.get(feature.name)}")
    print(f"Mean Value: {feature_mean:.2f}")
    print(f"Variance: {feature_variance:.2f}\n")

def analyze_features_jointly(feature_one, feature_two):
    pearsons_correlation = feature_one.corr(feature_two, method='pearson')
    covariance = feature_one.cov(feature_two)

    print(f"Covariance: {covariance:.2f}")
    print(f"Pearson's Correlation: {pearsons_correlation:.2f}")
    
def create_plot(feature_one, feature_two):
    title = f"{acronym_to_name.get(feature_one.name)} x {acronym_to_name.get(feature_two.name)}"
    fig = plt.figure()
    ax = fig.add_subplot(111)
    
    color = np.abs(feature) + np.abs(feature_two)
    ax.scatter(feature, pop, c=color, cmap='plasma')
    ax.set_xlabel(acronym_to_name.get(feature_one.name))
    ax.set_ylabel(acronym_to_name.get(feature_two.name))
    ax.set_title(title)
    plt.show()
    
    
# Load the data
PATH_TO_DATA="/Users/germaingirndt/source/machine_learning/prog_exercise_01/task_01/kaggle/Spotify 2010 - 2019 Top 100.csv"
data = pd.read_csv(PATH_TO_DATA)


features = [data.get('pop'), data.get('bpm'), data.get('nrgy'), data.get('dnce'), data.get('val')]
pop = features[0]



# Create scatter plots and compute Pearson's correlation coefficients
for feature in features:
    create_plot(feature, pop)
        
    analyze_feature_individually(pop)
    analyze_feature_individually(feature)
    analyze_features_jointly(pop, feature)
    
    
    print("\n" + "-" * 100)

    

    


