<a href="https://colab.research.google.com/github/OlieverGuadalupe/NASA-HACKATHON/blob/main/NASA_HACKATHON.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install dash



In [None]:
!pip install --upgrade openai



# Main Code

In [None]:
import dash
from dash import dcc, html, Input, Output
import plotly.express as px
import pandas as pd
import re # Import the re module for regular expressions

# Load and clean CSV
# Load the dataframe with summaries
df = pd.read_csv("hackaton_summary_local.csv")
df.columns = df.columns.str.strip().str.title()  # normalize column names

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("🚀 NASA Bioscience Research Dashboard", style={'textAlign': 'center'}),
    html.P("Explore NASA bioscience research papers and trends over the years.", style={'textAlign': 'center'}),

    html.Div([
        dcc.Input(
            id='keyword_search',
            type='text',
            placeholder='Search keywords in abstract...',
            style={'width': '60%', 'display': 'block', 'margin': '10px auto'} # Adjusted width, display, and margin
        ),
        dcc.Dropdown(
            id='organism_filter',
            options=[{'label': org, 'value': org} for org in df['Organism'].dropna().unique()],
            placeholder="Select an organism to filter",
            clearable=True,
            style={'width': '80%', 'display': 'block', 'margin': '10px auto'} # Adjusted width, display, and margin
        ),
    ], style={'width': '60%', 'margin': '20px auto', 'textAlign': 'center'}),


    dcc.Graph(id='publications_per_year'),

    html.Div(id='summary_box', style={
        'border': '1px solid #ccc',
        'padding': '15px',
        'borderRadius': '8px',
        'marginTop': '20px',
        'backgroundColor': '#f9f9f9'
    })
])


def highlight_keyword(text, keyword):
    """Highlights the keyword in the text with bold HTML tags."""
    if not keyword or not isinstance(text, str):
        return text
    # Create a bolded version of the keyword for highlighting
    bold_keyword = f"<b>{keyword}</b>"
    # Escape special characters in the keyword for regex
    escaped_keyword = re.escape(keyword)
    # Use regex to replace the keyword with its bolded version (case-insensitive)
    pattern = re.compile(f'({escaped_keyword})', re.IGNORECASE)
    return pattern.sub(bold_keyword, text)


@app.callback(
    [Output('publications_per_year', 'figure'),
     Output('summary_box', 'children')],
    [Input('organism_filter', 'value'),
     Input('keyword_search', 'value')] # Add keyword search input
)
def update_dashboard(selected_organism, search_keyword):
    filtered_df = df.copy() # Create a copy to avoid modifying the original dataframe

    if selected_organism:
        filtered_df = filtered_df[filtered_df['Organism'] == selected_organism]

    if search_keyword:
        # Filter by keyword in the Abstract column (case-insensitive)
        filtered_df = filtered_df[filtered_df['Abstract'].str.contains(search_keyword, case=False, na=False)]

        # Create a bolded version of the keyword for highlighting
        bold_keyword = f"<b>{search_keyword}</b>"
        # Escape special characters in the keyword for regex
        escaped_keyword = re.escape(search_keyword)
        # Use regex to replace the keyword with its bolded version (case-insensitive)
        pattern = re.compile(f'({escaped_keyword})', re.IGNORECASE)

    filtered_df = filtered_df[pd.to_numeric(filtered_df['Year'], errors='coerce').notnull()]
    filtered_df['Year'] = filtered_df['Year'].astype(int)

    papers_per_year = filtered_df.groupby('Year').size().reset_index(name='Count')
    fig = px.bar(
        papers_per_year,
        x='Year',
        y='Count',
        title=f'Publications Per Year{" — " + selected_organism if selected_organism else ""}',
        color='Count',
        color_continuous_scale='Blues'
    )

    summaries_list = []
    if filtered_df.empty:
        summaries_list.append(html.P("No publications found matching your criteria."))
    else:
        for _, row in filtered_df.iterrows():
            abstract_text = str(row.get('Abstract', 'No abstract'))
            summary_text = str(row.get('Summary', 'No summary'))

            # Determine abstract display based on search keyword
            if search_keyword:
                # Display full abstract with highlighting
                abstract_display = highlight_keyword(abstract_text, search_keyword)
                summary_display = highlight_keyword(summary_text, search_keyword)
            else:
                # Display truncated abstract
                abstract_display = f"{abstract_text[:250]}..." if len(abstract_text) > 250 else abstract_text
                summary_display = summary_text # Summary is always displayed as is

            summaries_list.append(html.Div([
                html.H4(f"📘 {row.get('Title', 'No title')}", style={'marginBottom': '5px'}),
                html.P(f"👩‍🔬 Author: {row.get('Author', 'N/A')}", style={'margin': '0'}),
                html.P(f"📚 Source: {row.get('Source', 'N/A')}", style={'margin': '0'}),
                # Displaying the abstract (truncated or full with highlighting)
                html.P(f"🧬 Abstract: {abstract_display}", style={'marginTop': '5px', 'fontStyle': 'italic'}),
                # Displaying the generated summary (with or without highlighting)
                html.P(f"📝 Summary: {summary_display}", style={'marginTop': '5px', 'fontWeight': 'bold'}),
                html.Hr()
            ]))

    return fig, summaries_list


if __name__ == "__main__":
    app.run(debug=True)

<IPython.core.display.Javascript object>

In [None]:
!pip install transformers



In [None]:
from transformers import pipeline
import pandas as pd

# Load local summarizer model
# Using a smaller, faster model for demonstration purposes
# You can replace 'sshleifer/distilbart-cnn-6-6' with 'facebook/bart-large-cnn' if you prefer,
# but it might take longer to download and run.
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")

df = pd.read_csv("hackaton.csv")
df.columns = df.columns.str.strip().str.title()

def summarize_local(text):
    if pd.isna(text):
        return "No abstract available."
    try:
        # Adjust max_length and min_length as needed for your desired summary length
        summary = summarizer(text, max_length=60, min_length=30, do_sample=False)
        return summary[0]['summary_text']
    except Exception as e:
        print(f"Error summarizing abstract: {e}")
        return "Error summarizing abstract."

if 'Abstract' in df.columns:
    df["Summary"] = df["Abstract"].apply(summarize_local)
    # Saving the summarized data to a new CSV
    df.to_csv("hackaton_summary_local.csv", index=False)
    print("✅ Summaries generated locally and saved to hackaton_summary_local.csv")
else:
    print("⚠️ Error: 'Abstract' column not found in the dataframe.")

Device set to use cpu


✅ Summaries generated locally and saved to hackaton_summary_local.csv
