In [None]:

import pandas as pd
from plotnine import (
    ggplot, aes, geom_point, geom_abline,
    labs, theme, element_line, scale_color_manual,
    coord_cartesian, xlim, ylim
)
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


In [9]:

# Cargar dataset original
df = pd.read_csv("datos\\music_genre.csv")

# Ver primeras filas
df.head()


ParserError: Error tokenizing data. C error: Expected 1 fields in line 7, saw 18


In [None]:

print("Estadísticas:")
print(df[['acousticness','loudness','music_genre']].describe())

print("\nNulos por columna:")
print(df[['acousticness','loudness','music_genre']].isnull().sum())

# Guardar versión filtrada (si quieres seguir usando en otros scripts)
df[['acousticness','loudness','music_genre']].to_csv("datos/music_filtrado.csv", index=False)


In [None]:

df_filtrado = pd.read_csv("datos/music_filtrado.csv")

plot = (
    ggplot(df_filtrado, aes(x='acousticness', y='loudness', color='music_genre'))
    + geom_point(shape='o', size=4, fill='white', stroke=1.5, alpha=0.7)
    + scale_color_manual(values={
        'Electronic': 'lightblue',
        'Pop': 'green',
        'Rock': 'orange',
        'Jazz': 'purple'
    })
    + labs(title='Loudness vs Acousticness por género musical',
           x='Acousticness',
           y='Loudness')
    + theme(
        axis_line_x=element_line(color='black', size=1),
        axis_line_y=element_line(color='black', size=1)
    )
)
plot


In [None]:

plot_recta = (
    ggplot() +
    geom_abline(intercept=4, slope=-2, color='blue') +
    coord_cartesian(xlim=(0, 5), ylim=(0, 5), expand=False) +
    xlim(0, 5) +
    ylim(0, 5) +
    labs(title='Gráfica de la recta y = -2x + 4', x='x', y='y') +
    theme(
        axis_line_x=element_line(color='black', size=1),
        axis_line_y=element_line(color='black', size=1),
    )
)
plot_recta


In [None]:

X = df_filtrado[['acousticness','loudness']]
y = df_filtrado['music_genre']

le = LabelEncoder()
y_encoded = le.fit_transform(y)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.25, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nReporte de clasificación:\n", classification_report(y_test, y_pred, target_names=le.classes_))
