# Analyse

In [None]:
# Analyze programming language trends
language_counts = df['language'].value_counts()
top_languages = language_counts.head(5)  # Get top 5 most used languages
print("Programming Language Trends:")
top_languages

In [None]:
# Analyze project statistics
average_stars = df['stars'].mean()
max_forks = df['forks'].max()

print("\nProject Statistics:")
print("Average Stars:", average_stars)
print("Max Forks:", max_forks)

In [None]:
# Analyze Repository Creation Trends Over Time
df['year_month'] = df['created_at'].dt.to_period('M')
creation_trends = df.groupby('year_month').size().reset_index(name='count')
print("Repository Creation Trends Over Time:")
creation_trends

In [None]:
# Convert 'year_month' column to string format
creation_trends['year_month'] = creation_trends['year_month'].astype(str)

# Create Repository Creation Trends Over Time Plot
plt.figure(figsize=(10, 6))
plt.plot(creation_trends['year_month'], creation_trends['count'], marker='o')
plt.xlabel('Year-Month')
plt.ylabel('Number of Repositories Created')
plt.title('Repository Creation Trends Over Time')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

In [None]:
# Analyze Popular Repositories by Stars
popular_repositories_by_stars = df.nlargest(10, 'stars')[['name', 'stars', 'url']]
print("\nPopular Repositories by Stars:")
popular_repositories_by_stars

In [None]:
# Top Repositories by Stars Bar Chart
plt.figure(figsize=(10, 6))
plt.bar(popular_repositories_by_stars['name'], popular_repositories_by_stars['stars']) 
plt.xlabel('Repository')
plt.ylabel('Number of Stars')
plt.title('Top 10 Starred Repositories')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

In [None]:
# Analyze Popular Repositories by Forks
popular_repositories_by_forks = df.nlargest(10, 'forks')[['name', 'forks', 'url']]
print("\nPopular Repositories by Forks:")
popular_repositories_by_forks

In [None]:
# Create Popular Repositories by Forks Bar Plot
plt.figure(figsize=(10, 6))
plt.barh(popular_repositories_by_forks['name'], popular_repositories_by_forks['forks'], color='lightgreen')
plt.xlabel('Number of Forks')
plt.ylabel('Repository Name')
plt.title('Popular Repositories by Forks')
plt.gca().invert_yaxis()
plt.show()

In [None]:
# Analyze Programming Language Usage
language_usage = df['language'].value_counts()
print("\nProgramming Language Usage:")
print(language_usage)

In [None]:
# Language Distribution Pie Chart
language_counts = df['language'].value_counts().head(10)  # Count the number of repositories for each language
plt.figure(figsize=(8, 8))
plt.pie(language_counts, labels=language_counts.index, autopct='%1.1f%%')  # Create a pie chart with language counts
plt.title('Top 10 Programming Languages')
plt.show()

In [None]:
# Average Stars by Language Bar Chart
avg_stars_by_language = df.groupby('language')['stars'].mean().sort_values(ascending=False).head(10)
plt.figure(figsize=(10, 6))
plt.bar(avg_stars_by_language.index, avg_stars_by_language) 
plt.xlabel('Language')
plt.ylabel('Average Stars')
plt.title('Average Stars by Programming Language')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

In [None]:
# Average Forks by Language Bar Chart
avg_forks_by_language = df.groupby('language')['forks'].mean().sort_values(ascending=False).head(10)
plt.figure(figsize=(10, 6))
plt.bar(avg_forks_by_language.index, avg_forks_by_language)
plt.xlabel('Language')
plt.ylabel('Average Forks')
plt.title('Average Forks by Programming Language')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

In [None]:
# Generate a word cloud based on the descriptions of repositories
from wordcloud import WordCloud

# Concatenate all descriptions into a single string
descriptions = ' '.join(df['description'].dropna().astype(str))

# Generate word cloud
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(descriptions)

# Plot the word cloud
plt.figure(figsize=(10, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud of Descriptions')
plt.show()

In [None]:
# Get the top owners by repository count
top_owners = df['owner'].value_counts().head(10)

# Plot the top owners by repository count
plt.figure(figsize=(10, 6))
plt.bar(top_owners.index, top_owners.values, color='lightcoral')
plt.xlabel('Owner')
plt.ylabel('Repository Count')
plt.title('Top Owners by Repository Count')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

In [None]:
# Calculate the total open issues by owner
owner_contribution = df.groupby('owner')['open_issues'].sum()

# Plot the owner contribution analysis
plt.figure(figsize=(10, 6))
plt.bar(owner_contribution.index, owner_contribution.values, color='lightgreen')
plt.xlabel('Owner')
plt.ylabel('Total Open Issues')
plt.title('Owner Contribution Analysis')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

In [None]:
# Calculate descriptive statistics of open issues
open_issues_stats = df['open_issues'].describe()

# Plot the descriptive statistics of open issues
plt.figure(figsize=(8, 5))
plt.bar(open_issues_stats.index, open_issues_stats.values, color='lightsalmon')
plt.xlabel('Statistic')
plt.ylabel('Value')
plt.title('Descriptive Statistics of Open Issues')
plt.grid(True)
plt.show()