#### Does the successful grant depend on the length of the proposal? 

In [8]:
import pandas as pd
import plotly.graph_objs as go

# Load the datasets
topics_df = pd.read_csv('stip_topics_data.csv')
detail_df = pd.read_csv('stip_detail_data_cleaned.csv')

# Convert "Topic ID" to string/object type in both datasets
topics_df['Topic ID'] = topics_df['Topic ID'].astype(str)
detail_df['Topic ID'] = detail_df['Topic ID'].astype(str)

# Merge datasets based on Topic ID
merged_df = pd.merge(topics_df, detail_df, on='Topic ID')

# Filter successful and unsuccessful proposals
successful_proposals = merged_df[merged_df['Status'] == 'Passed']
unsuccessful_proposals = merged_df[merged_df['Status'] != 'Passed']

# Calculate the average length of successful and unsuccessful proposals in terms of characters and words
avg_char_length_successful = successful_proposals['Description'].str.replace(' ', '').apply(len).mean()
avg_char_length_unsuccessful = unsuccessful_proposals['Description'].str.replace(' ', '').apply(len).mean()
avg_word_length_successful = successful_proposals['Description'].str.split().apply(len).mean()
avg_word_length_unsuccessful = unsuccessful_proposals['Description'].str.split().apply(len).mean()

# Create grouped bar chart for average character count and average word count
categories = ['Successful', 'Unsuccessful']
char_counts = [round(avg_char_length_successful), round(avg_char_length_unsuccessful)]
word_counts = [round(avg_word_length_successful), round(avg_word_length_unsuccessful)]

fig = go.Figure(data=[
    go.Bar(name='Average Character Count', x=categories, y=char_counts),
    go.Bar(name='Average Word Count', x=categories, y=word_counts)
])

# Update layout
fig.update_layout(barmode='group', title='Average Character and Word Count of Proposal Descriptions',
                  xaxis=dict(title='Proposal Status'), yaxis=dict(title='Average Count'))

# Show the grouped bar chart
fig.show()
fig.write_html("Impact of Proposal Length on Grant Success.html")


#### How important is defining the milestones clearly?

In [9]:
import pandas as pd
import plotly.graph_objs as go

# Load the datasets
topics_df = pd.read_csv('stip_topics_data.csv')
detail_df = pd.read_csv('stip_detail_data_cleaned.csv')

# Convert "Topic ID" to string/object type in both datasets
topics_df['Topic ID'] = topics_df['Topic ID'].astype(str)
detail_df['Topic ID'] = detail_df['Topic ID'].astype(str)

# Merge datasets based on Topic ID
merged_df = pd.merge(topics_df, detail_df, on='Topic ID')

# Extract milestone descriptions from the "Description" column
merged_df['Milestone Descriptions'] = merged_df['Description'].str.extract(r'Milestone Descriptions: (.*)')

# Calculate the average length of milestone descriptions for successful and unsuccessful proposals
avg_char_length_successful = merged_df[merged_df['Status'] == 'Passed']['Milestone Descriptions'].dropna().str.replace(' ', '').apply(len).mean()
avg_char_length_unsuccessful = merged_df[merged_df['Status'] != 'Passed']['Milestone Descriptions'].dropna().str.replace(' ', '').apply(len).mean()

# Create a grouped bar chart for average character count of milestone descriptions
categories = ['Successful', 'Unsuccessful']
char_counts = [round(avg_char_length_successful), round(avg_char_length_unsuccessful)]

fig = go.Figure(data=[go.Bar(name='Average Character Count', x=categories, y=char_counts)])

# Update layout
fig.update_layout(barmode='group', title='Average Character Count of Milestone Descriptions',
                  xaxis=dict(title='Proposal Status'), yaxis=dict(title='Average Character Count'))

# Show the grouped bar chart
fig.show()
fig.write_html("Importance of Clearly Defined Milestones in Grant Proposals.html")


In [7]:
import pandas as pd
import plotly.graph_objs as go

# Load the datasets
topics_df = pd.read_csv('stip_topics_data.csv')
detail_df = pd.read_csv('stip_detail_data_cleaned.csv')

# Convert "Topic ID" to string/object type in both datasets
topics_df['Topic ID'] = topics_df['Topic ID'].astype(str)
detail_df['Topic ID'] = detail_df['Topic ID'].astype(str)

# Merge datasets based on Topic ID
merged_df = pd.merge(topics_df, detail_df, on='Topic ID')

# Extract milestone descriptions from the "Description" column
merged_df['Has Milestone Descriptions'] = merged_df['Description'].str.contains('Milestone Descriptions')

# Count the number of successful and unsuccessful proposals with and without milestone descriptions
success_with_milestone = merged_df[(merged_df['Status'] == 'Passed') & (merged_df['Has Milestone Descriptions'])].shape[0]
success_without_milestone = merged_df[(merged_df['Status'] == 'Passed') & (~merged_df['Has Milestone Descriptions'])].shape[0]
unsuccess_with_milestone = merged_df[(merged_df['Status'] != 'Passed') & (merged_df['Has Milestone Descriptions'])].shape[0]
unsuccess_without_milestone = merged_df[(merged_df['Status'] != 'Passed') & (~merged_df['Has Milestone Descriptions'])].shape[0]

# Create pie chart data
labels = ['Successful with Milestones', 'Successful without Milestones', 'Unsuccessful with Milestones', 'Unsuccessful without Milestones']
values = [success_with_milestone, success_without_milestone, unsuccess_with_milestone, unsuccess_without_milestone]

# Create pie chart
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=0.3)])

# Update layout
fig.update_layout(title='Proportion of Proposals with and without Milestone Descriptions')

# Show the pie chart
fig.show()
fig.write_html(".html")


#### What is the lowest, highest, mean, median & mode ask for the grant amount?

In [10]:
import pandas as pd
import plotly.graph_objs as go

# Load the dataset
topics_df = pd.read_csv('stip_topics_data.csv')

# Convert "Requested Amount" column to numeric and handle ranges
def convert_range_to_average(amount):
    if '-' in amount:
        range_values = amount.split('-')
        try:
            lower = float(range_values[0].replace(',', ''))
            upper = float(range_values[1].replace(',', ''))
            return (lower + upper) / 2
        except ValueError:
            return None
    else:
        try:
            return float(amount.replace(',', ''))
        except ValueError:
            return None

# Apply the function to convert range to average
topics_df['Requested Amount'] = topics_df['Requested Amount'].apply(convert_range_to_average)

# Drop rows with missing or NaN values in the "Requested Amount" column
topics_df.dropna(subset=['Requested Amount'], inplace=True)

# Calculate the statistical measures
lowest_amount = topics_df['Requested Amount'].min()
highest_amount = topics_df['Requested Amount'].max()
mean_amount = topics_df['Requested Amount'].mean()
median_amount = topics_df['Requested Amount'].median()
mode_amount = topics_df['Requested Amount'].mode().values[0]

# Define data for the grouped bar chart
measures = ['Lowest', 'Highest', 'Mean', 'Median', 'Mode']
values = [lowest_amount, highest_amount, mean_amount, median_amount, mode_amount]

# Create trace for the bar chart
trace = go.Bar(
    x=measures,
    y=values,
    marker=dict(color=['blue', 'orange', 'green', 'red', 'purple']),
)

# Create layout
layout = go.Layout(
    title='Grant Amount Statistics',
    xaxis=dict(title='Statistical Measure'),
    yaxis=dict(title='Grant Amount'),
)

# Create figure and plot
fig = go.Figure(data=[trace], layout=layout)
fig.show()
fig.write_html("Distribution of Grant Amounts in STIP Proposals.html")


#### Do certain types or styles of proposal titles correlate with higher success rates?

In [11]:
import pandas as pd
import plotly.graph_objects as go

# Load the dataset
topics_df = pd.read_csv('stip_topics_data.csv')

# Filter successful and unsuccessful proposals
successful_proposals = topics_df[topics_df['Status'] == 'Passed']
unsuccessful_proposals = topics_df[topics_df['Status'] != 'Passed']

# Preprocess titles: Remove whitespace and convert to lowercase
successful_proposals['Title'] = successful_proposals['Title'].str.strip().str.lower()
unsuccessful_proposals['Title'] = unsuccessful_proposals['Title'].str.strip().str.lower()

# Analyze title length
successful_proposals['Title Length'] = successful_proposals['Title'].apply(len)
unsuccessful_proposals['Title Length'] = unsuccessful_proposals['Title'].apply(len)

# Create histogram for successful and unsuccessful proposals
fig = go.Figure()

fig.add_trace(go.Histogram(
    x=successful_proposals['Title Length'],
    name='Successful',
    marker_color='blue',
    opacity=0.7
))

fig.add_trace(go.Histogram(
    x=unsuccessful_proposals['Title Length'],
    name='Unsuccessful',
    marker_color='red',
    opacity=0.7
))

# Update layout
fig.update_layout(
    title='Distribution of Proposal Title Lengths',
    xaxis_title='Title Length',
    yaxis_title='Frequency',
    barmode='overlay'
)

# Add legend
fig.update_traces(overwrite=True, showlegend=True)

# Show plot
fig.show()
fig.write_html("Correlation Between Proposal Title Length and Success Rates.html")




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

#### Do proposals with a higher number of views tend to have a higher success rate?

In [12]:
import pandas as pd
import plotly.express as px

# Load the dataset
topics_df = pd.read_csv('stip_topics_data.csv')

# Filter successful and unsuccessful proposals
successful_proposals = topics_df[topics_df['Status'] == 'Passed']
unsuccessful_proposals = topics_df[topics_df['Status'] != 'Passed']

# Plotting
fig = px.scatter(topics_df, x='Views', color='Status', hover_data=['Title'], title='Relationship between Views and Success Rate')
fig.update_layout(xaxis_title='Number of Views', yaxis_title='Proposal Status')
fig.show()
fig.write_html("Relationship Between Proposal Views and Success Rates.html")


#### Do proposals with a higher number of likes tend to have a higher success rate?

In [13]:
import pandas as pd
import plotly.express as px

# Load the dataset
topics_df = pd.read_csv('stip_topics_data.csv')

# Filter successful and unsuccessful proposals
successful_proposals = topics_df[topics_df['Status'] == 'Passed']
unsuccessful_proposals = topics_df[topics_df['Status'] != 'Passed']

# Plotting
fig = px.scatter(topics_df, x='Like Count', color='Status', hover_data=['Title'], title='Relationship between Likes and Success Rate')
fig.update_layout(xaxis_title='Number of Likes', yaxis_title='Proposal Status')
fig.show()
fig.write_html("Relationship Between Proposal Likes and Success Rates.html")
