In [13]:
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt

# Set the page title
st.title("Bibliometric Analysis Dashboard")

# Step 1: Load the Heart Disease Dataset
@st.cache_data  # This helps load the data faster by saving it
def load_heart_data():
    try:
        # Read the Excel file (change to .csv if you save it as CSV)
        heart_df = pd.read_excel("heart disease dataset.xls", engine='openpyxl')
        # Check if required columns are present, add "N/A" if missing
        required_cols = ["Title", "Authors", "Year", "Publisher", "Citations", "Link"]
        for col in required_cols:
            if col not in heart_df.columns:
                heart_df[col] = "N/A"
        return heart_df
    except FileNotFoundError:
        st.error("Error: 'heart disease dataset.xls' not found. Please place the file in the same folder as this script.")
        return pd.DataFrame()
    except Exception as e:
        st.error(f"Error loading 'heart disease dataset.xls': {e}")
        return pd.DataFrame()

# Step 2: Load the Author Details Dataset
@st.cache_data  # This helps load the data faster by saving it
def load_author_data():
    try:
        # Read the Excel file
        author_df = pd.read_excel("author_details_output.xlsx", engine='openpyxl')
        # Convert 'Citations Per Year' from string to dictionary if it's in that format
        if 'Citations Per Year' in author_df.columns:
            author_df['Citations Per Year'] = author_df['Citations Per Year'].apply(
                lambda x: eval(x) if isinstance(x, str) and '{' in x else x
            )
        # Check if required columns are present, add "N/A" if missing
        required_cols = ["Name", "Affiliation", "Interests", "Cited by", "H-Index", "i10-Index", "Citations Per Year"]
        for col in required_cols:
            if col not in author_df.columns:
                author_df[col] = "N/A"
        return author_df
    except FileNotFoundError:
        st.error("Error: 'author_details_output.xlsx' not found. Please place the file in the same folder as this script.")
        return pd.DataFrame()
    except Exception as e:
        st.error(f"Error loading 'author_details_output.xlsx': {e}")
        return pd.DataFrame()

# Load the data
heart_data = load_heart_data()
author_data = load_author_data()

# Step 3: Topic-wise Bibliometric Analysis (Heart Disease Dataset)
st.header("Topic-wise Bibliometric Analysis")

if not heart_data.empty:
    # Filter by year using a simple slider
    year_range = st.slider("Select Year Range", min_value=int(heart_data["Year"].min()), 
                           max_value=int(heart_data["Year"].max()), 
                           value=(int(heart_data["Year"].min()), int(heart_data["Year"].max())))
    filtered_heart_data = heart_data[(heart_data["Year"] >= year_range[0]) & (heart_data["Year"] <= year_range[1])]

    # Show a simple table of the data
    st.subheader("Articles Table")
    st.write(filtered_heart_data)

    # Simple bar chart for articles per year
    st.subheader("Articles Published Per Year")
    articles_per_year = filtered_heart_data["Year"].value_counts().sort_index()
    plt.figure(figsize=(10, 5))
    plt.bar(articles_per_year.index, articles_per_year.values)
    plt.xlabel("Year")
    plt.ylabel("Number of Articles")
    plt.title("Articles Published Per Year")
    st.pyplot(plt)

    # Simple average citations per year
    st.subheader("Average Citations Per Year")
    avg_citations_per_year = filtered_heart_data.groupby("Year")["Citations"].mean()
    plt.figure(figsize=(10, 5))
    plt.plot(avg_citations_per_year.index, avg_citations_per_year.values, marker='o')
    plt.xlabel("Year")
    plt.ylabel("Average Citations")
    plt.title("Average Citations Per Year")
    st.pyplot(plt)

else:
    st.write("No heart disease data available to display.")

# Step 4: Author-wise Bibliometric Analysis
st.header("Author-wise Bibliometric Analysis")

if not author_data.empty:
    # Show author details
    st.subheader("Author Details")
    for index, row in author_data.iterrows():
        st.write(f"**Name:** {row['Name']}")
        st.write(f"Affiliation: {row['Affiliation']}")
        st.write(f"Interests: {row['Interests']}")
        st.write(f"Cited by: {row['Cited by']}")
        st.write(f"H-Index: {row['H-Index']}")
        st.write(f"i10-Index: {row['i10-Index']}")

        # Plot citations per year
        st.subheader("Citations Per Year")
        if isinstance(row['Citations Per Year'], dict):
            years = list(row['Citations Per Year'].keys())
            citations = list(row['Citations Per Year'].values())
            plt.figure(figsize=(10, 5))
            plt.plot(years, citations, marker='o')
            plt.xlabel("Year")
            plt.ylabel("Citations")
            plt.title(f"Citations Per Year for {row['Name']}")
            st.pyplot(plt)
        else:
            st.write("Citations Per Year data not available in the correct format.")

else:
    st.write("No author data available to display.")

2025-06-22 08:50:35.660 No runtime found, using MemoryCacheStorageManager
2025-06-22 08:50:35.662 No runtime found, using MemoryCacheStorageManager
2025-06-22 08:50:35.667 No runtime found, using MemoryCacheStorageManager
2025-06-22 08:50:35.678 No runtime found, using MemoryCacheStorageManager
