In [None]:
import pandas as pd
import plotly.express as px

# Load the diamond dataset
diamond_data = pd.read_csv("https://raw.githubusercontent.com/Jeniejean/Applied-Stat/main/Diamond_Data%20(3).csv")

# Create a scatter plot for Carat vs. Price
fig_carat_price = px.scatter(diamond_data, x="carat", y="price",
                             size="carat", color="cut",
                             hover_name="cut",
                             title="Carat vs. Price with Cut Quality")

# Customize the layout
fig_carat_price.update_layout(
    xaxis_title="Carat",
    yaxis_title="Price",
    legend_title="Cut Quality"
)

# Show the plot
fig_carat_price.show()

In [None]:
# Calculate the average price based on color and clarity grades
avg_price_color_clarity = diamond_data.groupby(["color", "clarity"])["price"].mean().reset_index()

# Create a bar chart for Color and Clarity analysis
fig_color_clarity = px.bar(avg_price_color_clarity, x="color", y="price", color="clarity",
                           title="Average Price by Color and Clarity",
                           labels={"color": "Color Grade", "price": "Average Price", "clarity": "Clarity Grade"},
                           hover_name="clarity")

# Customize the layout
fig_color_clarity.update_layout(
    xaxis_title="Color Grade",
    yaxis_title="Average Price",
    legend_title="Clarity Grade"
)

# Show the plot
fig_color_clarity.show()

In [None]:
! pip install dash
import pandas as pd
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

# Load the diamond dataset
diamond_data = pd.read_csv("https://raw.githubusercontent.com/Jeniejean/Applied-Stat/main/Diamond_Data%20(3).csv")

# Calculate the average price based on color and clarity grades
avg_price_color_clarity = diamond_data.groupby(["color", "clarity"])["price"].mean().reset_index()

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the dashboard
app.layout = html.Div([
    html.H1("Diamond Analysis Dashboard"),

    # Carat vs. Price scatter plot
    dcc.Graph(id="carat-price-scatter", figure=fig_carat_price),

    # Color and Clarity bar chart
    dcc.Graph(id="color-clarity-bar", figure=fig_color_clarity)
])

# Run the Dash app
if __name__ == "__main__":
    app.run_server(debug=True)



<IPython.core.display.Javascript object>

In [None]:
# Install necessary packages
!pip install dash gunicorn pandas plotly transformers

# Import libraries
import pandas as pd
import plotly.express as px
import dash
from dash import dcc, html, Input, Output, State
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import random
import string
from google.colab.output import eval_js

# Load the diamond dataset
diamond_data = pd.read_csv("https://raw.githubusercontent.com/Jeniejean/Applied-Stat/main/Diamond_Data%20(3).csv")

# Calculate the average price based on color and clarity grades
avg_price_color_clarity = diamond_data.groupby(["color", "clarity"])["price"].mean().reset_index()

# Load the text-to-text generation model and tokenizer
model_name = "google/byt5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout of the dashboard
app.layout = html.Div([
    html.H1("Diamond Analysis Dashboard"),

    # Introductory text about the diamond industry
    html.Div([
        html.P("Welcome to the Diamond Analysis Dashboard!"),
        html.P("The diamond industry is a fascinating and complex field that involves the mining, cutting, trading, and selling of diamonds."),
        html.P("This dashboard provides insights into various aspects of diamond data, including carat weight, price, cut quality, color, and clarity."),
        html.P("Explore the visualizations to gain a better understanding of how these factors influence diamond prices and quality.")
    ], style={"margin-bottom": "20px"}),

    # Carat vs. Price scatter plot
    dcc.Graph(id="carat-price-scatter"),

    # Color and Clarity bar chart
    dcc.Graph(id="color-clarity-bar"),

    # Input field and button for text-to-text generation
    html.Div([
        dcc.Textarea(id="input-text", placeholder="Enter diamond information...", style={"width": "100%", "height": "200px"}),
        html.Button("Generate Summary", id="generate-summary-button", n_clicks=0)
    ], style={"margin-top": "20px"}),

    # Output component for the generated summary
    html.Div(id="summary-output")
])

# Define a function to generate summaries
def generate_summary(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding="max_length")
    summary_ids = model.generate(inputs.input_ids, num_beams=4, min_length=30, max_length=200, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Define callback to generate the summary when the button is clicked
@app.callback(
    Output("summary-output", "children"),
    [Input("generate-summary-button", "n_clicks")],
    [State("input-text", "value")]
)
def update_summary(n_clicks, input_text):
    if n_clicks > 0 and input_text:
        summary = generate_summary(input_text)
        return html.Div([
            html.P("Summary:"),
            html.P(summary)
        ])
    else:
        return html.Div()

# Define callback to update carat-price scatter plot
@app.callback(
    Output("carat-price-scatter", "figure"),
    Input("color-clarity-bar", "hoverData")
)
def update_carat_price_scatter(hoverData):
    # Your update logic here
    return fig_carat_price

# Define callback to update color-clarity bar chart
@app.callback(
    Output("color-clarity-bar", "figure"),
    Input("carat-price-scatter", "hoverData")
)
def update_color_clarity_bar(hoverData):
    # Your update logic here
    return fig_color_clarity

# Run the Dash app
if __name__ == "__main__":
    # Generate a random tunnel URL
    tunnel_url = ''.join(random.choices(string.ascii_letters + string.digits, k=16))
    # Start ngrok tunnel
    eval_js(f"window.open('http://localhost:8050/assets/iframe.html#{tunnel_url}', '_blank')")
    app.run_server(debug=False)



<IPython.core.display.Javascript object>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import plotly.express as px

# Load the diamond dataset from the provided URL
diamond_data = pd.read_csv("https://raw.githubusercontent.com/Jeniejean/Applied-Stat/main/Diamond_Data%20(3).csv")

# Split data into features and target
X = diamond_data.drop(columns=['price'])
y = diamond_data['price']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest regression model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Visualize the predicted vs. actual prices
fig = px.scatter(x=y_test, y=y_pred, labels={'x': 'Actual Price', 'y': 'Predicted Price'}, title='Actual vs. Predicted Diamond Prices')
fig.show()

Mean Squared Error: 1878614.8608097557


In [None]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder
import plotly.express as px

# Load the diamond dataset from the provided URL
diamond_data = pd.read_csv("https://raw.githubusercontent.com/Jeniejean/Applied-Stat/main/Diamond_Data%20(3).csv")

# Encode categorical features
label_encoder = LabelEncoder()
diamond_data['cut'] = label_encoder.fit_transform(diamond_data['cut'])
diamond_data['color'] = label_encoder.fit_transform(diamond_data['color'])
diamond_data['clarity'] = label_encoder.fit_transform(diamond_data['clarity'])

# Select relevant features for clustering
X = diamond_data[['carat', 'cut', 'color', 'clarity']]

# Perform K-means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
diamond_data['cluster'] = kmeans.fit_predict(X)

# Visualize the clusters
fig = px.scatter_3d(diamond_data, x='carat', y='color', z='price', color='cluster',
                    symbol='cut', opacity=0.7, size_max=10, title='Diamond Clusters')
fig.show()





In [None]:
import pandas as pd
import plotly.express as px

# Load the diamond dataset from the provided URL
diamond_data = pd.read_csv("https://raw.githubusercontent.com/Jeniejean/Applied-Stat/main/Diamond_Data%20(3).csv")

# Create a bubble chart
fig = px.scatter(diamond_data,
                 x='carat',
                 y='price',
                 size='depth',
                 color='cut',
                 hover_name='color',
                 log_x=True,
                 size_max=60,
                 title='Diamond Data Bubble Chart')

# Update layout for better visuals
fig.update_layout(template='plotly_dark',
                  xaxis_title='Carat',
                  yaxis_title='Price',
                  title='Diamond Data Analysis')

fig.show()

In [None]:
import pandas as pd
import plotly.express as px

# Load the diamond dataset
diamond_data = pd.read_csv("https://raw.githubusercontent.com/Jeniejean/Applied-Stat/main/Diamond_Data%20(3).csv")

# Create a bubble chart with improved aesthetics
fig = px.scatter(diamond_data,
                 x='color',
                 y='cut',
                 size='carat',
                 color='price',
                 hover_name='clarity',
                 size_max=60,
                 title='Diamond Data Bubble Chart')

# Update layout for better visuals
fig.update_layout(
    template='simple_white',
    xaxis_title='Diamond Color',
    yaxis_title='Diamond Cut',
    title='Diamond Data Analysis',
    title_font=dict(size=24, family='Arial, bold', color='black'),
    xaxis=dict(
        title_font=dict(size=18, family='Arial, bold', color='black'),
        tickfont=dict(size=14, family='Arial', color='black')
    ),
    yaxis=dict(
        title_font=dict(size=18, family='Arial, bold', color='black'),
        tickfont=dict(size=14, family='Arial', color='black')
    ),
    legend=dict(
        title_font=dict(size=16, family='Arial, bold', color='black'),
        font=dict(size=14, family='Arial', color='black')
    )
)

# Update the marker settings
fig.update_traces(marker=dict(
    line=dict(width=1, color='DarkSlateGrey'),
    opacity=0.8,
    colorscale='Reds'
))

fig.show()

In [None]:
import pandas as pd
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

# Load the diamond dataset
diamond_data = pd.read_csv("https://raw.githubusercontent.com/Jeniejean/Applied-Stat/main/Diamond_Data%20(3).csv")

# Initialize the Dash app
app = dash.Dash(__name__)

app.layout = html.Div(style={'backgroundColor': 'white'}, children=[
    html.H1("Diamond Analysis Dashboard", style={'textAlign': 'center', 'color': 'black'}),

    # Dropdown for selecting the cut quality
    dcc.Dropdown(
        id='cut-dropdown',
        options=[{'label': cut, 'value': cut} for cut in diamond_data['cut'].unique()],
        value='Ideal',
        clearable=False,
        style={'margin-bottom': '20px'}
    ),

    # Multi-select dropdown for selecting colors
    dcc.Dropdown(
        id='color-dropdown',
        options=[{'label': color, 'value': color} for color in diamond_data['color'].unique()],
        value=diamond_data['color'].unique().tolist(),
        multi=True,
        style={'margin-bottom': '20px'}
    ),

    # Checkboxes for selecting clarity
    dcc.Checklist(
        id='clarity-checklist',
        options=[{'label': clarity, 'value': clarity} for clarity in diamond_data['clarity'].unique()],
        value=diamond_data['clarity'].unique().tolist(),
        style={'margin-bottom': '20px'}
    ),

    # Text input for manual filter
    dcc.Input(
        id='carat-input',
        type='number',
        placeholder='Enter carat weight',
        style={'margin-bottom': '20px'}
    ),

    # Slider for price range
    dcc.Slider(
        id='price-slider',
        min=diamond_data['price'].min(),
        max=diamond_data['price'].max(),
        value=diamond_data['price'].max(),
        marks={i: f'${i}' for i in range(0, int(diamond_data['price'].max()), 5000)},
        step=500
    ),

    # Placeholder for scatter plot
    dcc.Graph(id='scatter-plot'),

    # Placeholder for histogram
    dcc.Graph(id='histogram')
])

@app.callback(
    [Output('scatter-plot', 'figure'),
     Output('histogram', 'figure')],
    [Input('cut-dropdown', 'value'),
     Input('color-dropdown', 'value'),
     Input('clarity-checklist', 'value'),
     Input('carat-input', 'value'),
     Input('price-slider', 'value')]
)
def update_graphs(selected_cut, selected_colors, selected_clarities, carat_weight, price_range):
    filtered_data = diamond_data[
        (diamond_data['cut'] == selected_cut) &
        (diamond_data['color'].isin(selected_colors)) &
        (diamond_data['clarity'].isin(selected_clarities)) &
        (diamond_data['price'] <= price_range)
    ]

    if carat_weight:
        filtered_data = filtered_data[filtered_data['carat'] == carat_weight]

    scatter_fig = px.scatter(filtered_data, x='carat', y='price', color='color',
                             title='Carat vs. Price', color_continuous_scale='Reds')

    histogram_fig = px.histogram(filtered_data, x='price', nbins=50,
                                 title='Price Distribution', color_discrete_sequence=['red'])

    return scatter_fig, histogram_fig

if __name__ == '__main__':
    app.run_server(debug=True)

<IPython.core.display.Javascript object>

In [None]:
from google.colab import drive
drive.mount('/content/Dads_Tools')

%pip install dash pandas dash_bootstrap_components dash_core_components plotly transformers

print("Libraries imported successfully!")

import dash
import pandas as pd
import dash_bootstrap_components as dbc
from dash import dcc
import plotly.graph_objects as go
from dash import html, dash_table
from dash.dependencies import Input, Output, State
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the diamond dataset
diamond_data_url = "https://raw.githubusercontent.com/Jeniejean/Applied-Stat/main/Diamond_Data%20(3).csv"
diamond_df = pd.read_csv(diamond_data_url)

# Split dataset into features and target variable
X = diamond_df.drop(columns=['price'])
y = diamond_df['price']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate model performance
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Initialize Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.MATERIA])

# Define layout
app.layout = dbc.Container([
    html.H1("Diamond Price Prediction", className='Noto_regular_font', style={'textAlign': 'center', 'marginBottom': '15px', 'fontWeight': 'bold', 'fontSize': '35px'}),

    # Add your Dash components here

])

if __name__ == "__main__":
    app.run_server(debug=True)

Mounted at /content/Dads_Tools
Collecting dash_bootstrap_components
  Downloading dash_bootstrap_components-1.6.0-py3-none-any.whl (222 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m222.5/222.5 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dash_bootstrap_components
Successfully installed dash_bootstrap_components-1.6.0
Libraries imported successfully!
Mean Squared Error: 1130186.013673885


<IPython.core.display.Javascript object>