In [1]:
import pandas as pd
import plotly.express as px


CC = 0.2
CB = 0.4
MM = 0.4


def standardize(column):
    return (column - column.mean()) / column.std()


def chipset_punct(df):
    return standardize(df["core_boost_clock"]) / standardize(df["price"])


def apply_chipset_punct(df):
    df["punct"] = chipset_punct(df)
    return df


def graphics_card_punct(df):
    return CC * standardize(df["core_clock"]) + CB * standardize(df["core_boost_clock"]) + MM * standardize(df["memory"])

# Presentacion PCD

Obtención del DF completo. Primero generamos un dataframe con todas las marcas unidas.


In [2]:
dfs = []
for brand in ["Asus", "Gigabyte", "MSI"]:
    df = pd.read_csv(f"../data/{brand}.csv")
    df["brand"] = brand
    dfs.append(df)

columns = ["price", "memory", "core_clock", "core_boost_clock",
           "chipset", "name", "brand", "user_score", "user_ratings_count"]
df = pd.concat(dfs, ignore_index=True).dropna().reset_index(drop=True)
df = df[df["parent_brand"] == "NVIDIA"].reset_index(drop=True)
df = df[columns]

df.head()

Unnamed: 0,price,memory,core_clock,core_boost_clock,chipset,name,brand,user_score,user_ratings_count
0,2179.99,24576.0,2235.0,2640.0,GeForce RTX 4090,Asus ROG STRIX GAMING OC,Asus,5,12
1,999.99,16384.0,2210.0,2580.0,GeForce RTX 4080 SUPER,Asus TUF GAMING,Asus,4,4
2,2249.99,24576.0,2235.0,2640.0,GeForce RTX 4090,Asus ROG STRIX GAMING OC,Asus,5,7
3,849.0,16384.0,2340.0,2670.0,GeForce RTX 4070 Ti SUPER,Asus TUF GAMING OC,Asus,5,11
4,1149.97,16384.0,2210.0,2640.0,GeForce RTX 4080 SUPER,Asus ProArt OC,Asus,5,6


In [3]:
df.tail()

Unnamed: 0,price,memory,core_clock,core_boost_clock,chipset,name,brand,user_score,user_ratings_count
411,855.0,8192.0,1515.0,1860.0,GeForce RTX 2080,MSI SEA HAWK X,MSI,3,1
412,370.0,6144.0,1500.0,1860.0,GeForce GTX 1660 Ti,MSI ARMOR OC,MSI,0,0
413,788.03,10240.0,1440.0,1740.0,GeForce RTX 3080 10GB LHR,MSI VENTUS 3X OC,MSI,0,0
414,399.0,6144.0,1506.0,1708.0,GeForce GTX 1060 6GB,MSI ARMOR OC,MSI,4,4
415,679.0,11264.0,1480.0,1582.0,GeForce GTX 1080 Ti,MSI Founders Edition,MSI,5,8


## ¿Cuánto cuesta el Hz de core_boost_clock?


In [4]:
df_filtered = df[df["chipset"].str.contains("RTX 40", case=False, na=False)]

df_filtered["boost_clock_per_dollar"] = df_filtered["core_boost_clock"] / df_filtered["price"]

fig = px.box(
    df_filtered.sort_values("boost_clock_per_dollar", ascending=False),
    x="chipset",
    y="boost_clock_per_dollar",
    title="Boost Clock por dolar de las tarjetas gráficas RTX 40 a través de los diferentes vendedores",
    labels={
        "boost_clock_per_dollar": "Clock Speed por Dollar",
        "chipset": "Chipset"
    },
    hover_data=["price", "core_boost_clock", "brand"],
    height=800,
)

fig.update_layout(
    xaxis_title="Chipset",
    yaxis_title="Boost Clock Per Dollar",
    legend_title="Parent Brand",
)

fig.update_traces(marker=dict(color="rgb(118,185,0)"))

fig.show()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered["boost_clock_per_dollar"] = df_filtered["core_boost_clock"] / df_filtered["price"]


## ¿Cómo se correlacionan las características técnicas de una tarjeta gráfica?


In [5]:
columns_cor = columns[0:4]
matrix = df_filtered[columns_cor].corr()

fig = px.imshow(
    matrix,
    text_auto=True,
    title="Mapa de correlaciones entre las variables",
    labels={"color": "Correlation"},
    x=columns_cor,
    y=columns_cor,
    color_continuous_scale="Viridis",
)

fig.show()

## ¿Cuál es la mejor tarjeta gráfica de cada marca para comprar cierto chipset en relación calidad precio?


In [16]:
chipset = "GeForce RTX 4070 SUPER"

# Get the best graphics card name for each brand for the given chipset
df_filtered = df[df["chipset"] == chipset]
max_per_brand = df_filtered.groupby("brand") \
    .apply(apply_chipset_punct) \
    .reset_index(drop=True) \
    .groupby("brand") \
    .max() \
    .reset_index() \
    .sort_values("punct", ascending=False)

fig = px.bar(
    max_per_brand,
    x="name",
    y="punct",
    title=f"Mejor tarjeta gráfica para el chipset {chipset} de cada vendedor",
    labels={
        "punct": "Puntuación",
        "name": "Tarjeta Gráfica",
    },
    hover_data=["name", "price", "core_clock", "core_boost_clock", "memory"],
    color="brand",
    width=800,
    height=600,
)

fig.show()





## ¿Cuál es la mejor marca para comprar cierto chipset dicho por los usuarios?


In [7]:
chipset_filtered = df[df["chipset"] == chipset][["user_score", "user_ratings_count", "brand"]].dropna()
chipset_filtered = chipset_filtered[chipset_filtered["user_ratings_count"] > 2].reset_index(drop=True)

grouped = chipset_filtered.groupby("brand").mean().sort_values("user_score", ascending=False).reset_index()

fig = px.bar(
    grouped,
    x="brand",
    y="user_score",
    title=f"Media de puntuación de usuario de {chipset}",
    labels={"user_score": "User Score", "brand": "Brand"},
    width=800,
    height=600,
    color="brand",
    hover_data=["user_ratings_count"]
)
fig.update_traces(width=0.5)
fig.show()

## Finalmente, ¿dado cierto presupuesto, cuál es la mejor tarjeta gráfica que puedo comprar?


In [8]:
budget = 1000

chipset_filtered = df[df["price"] <= budget].drop(columns=["user_score", "user_ratings_count"]).dropna()
max_chipsets = chipset_filtered.groupby("chipset")["core_boost_clock"].max().reset_index()
max_chipsets = pd.merge(max_chipsets, chipset_filtered, on=["chipset", "core_boost_clock"]).drop_duplicates().reset_index(drop=True)

max_chipsets["punct"] = graphics_card_punct(max_chipsets)
max_chipsets = max_chipsets.sort_values("punct", ascending=False).iloc[:3]

fig = px.bar(
    max_chipsets,
    x=max_chipsets.apply(lambda row: f"{row['name']} - {row['chipset']}", axis=1),
    y="punct",
    title=f"Mejores tarjetas gráficas por debajo de ${budget}",
    labels={"punct": "Puntuación", "x": "Name - Chipset"},
    hover_data=["price", "core_clock", "core_boost_clock", "memory"],
    color="brand",
)
fig.update_traces(width=0.2)
fig.show()