# GitHub Repository Analyzer - Interactive Demo

This notebook demonstrates how to use the GitHub Repository Analyzer interactively in Jupyter.

## Setup

Before running this notebook:
1. Install dependencies: `pip install -r requirements.txt`
2. Set up your GitHub token in a `.env` file
3. Install Jupyter: `pip install jupyter ipywidgets`

In [None]:
# Import required libraries
import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from dotenv import load_dotenv

# Add parent directory to path
sys.path.append('..')

# Import the analyzer
from main import GitHubRepositoryAnalyzer

# Set up plotting
%matplotlib inline
plt.style.use('seaborn-v0_8')
sns.set_palette('viridis')

In [None]:
# Load environment variables and initialize analyzer
load_dotenv()
github_token = os.getenv('GITHUB_TOKEN')

if not github_token:
    print("❌ Please set your GITHUB_TOKEN in a .env file")
else:
    print("✅ GitHub token loaded successfully")
    analyzer = GitHubRepositoryAnalyzer(token=github_token)
    print("✅ Analyzer initialized")

## Example 1: Basic Repository Search and Analysis

In [None]:
# Search for Python machine learning repositories
print("🔍 Searching for Python ML repositories...")

repositories = analyzer.search_repositories(
    query="language:python topic:machine-learning",
    max_repos=150,
    time_window="last 6 months"
)

print(f"📊 Found {len(repositories)} repositories")

# Display top repositories
if repositories:
    top_repos = sorted(repositories, key=lambda x: x['stars'], reverse=True)[:10]
    
    df_top = pd.DataFrame([
        {
            'Repository': repo['full_name'],
            'Stars': repo['stars'],
            'Forks': repo['forks'],
            'Language': repo.get('language', 'Unknown')
        }
        for repo in top_repos
    ])
    
    print("\n🏆 Top 10 Repositories:")
    display(df_top)

In [None]:
# Perform comprehensive analysis
print("🔬 Performing analysis...")

analysis_results = analyzer.analyze_all(
    perform_clustering=True,
    predict_trends=True
)

print("✅ Analysis completed!")

# Display summary statistics
summary = analysis_results.get('summary', {})
print(f"\n📈 Summary Statistics:")
for key, value in summary.items():
    print(f"  • {key.replace('_', ' ').title()}: {value:,}" if isinstance(value, (int, float)) else f"  • {key.replace('_', ' ').title()}: {value}")

## Example 2: Language Analysis and Visualization

In [None]:
# Analyze programming languages
languages = analysis_results.get('languages', [])

if languages:
    df_languages = pd.DataFrame(languages)
    
    # Display top languages
    print("💻 Programming Languages Analysis:")
    display(df_languages.head(10)[['language', 'count_stars', 'sum_stars', 'mean_stars', 'popularity_score']])
    
    # Create visualization
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    # Repository count by language
    top_10_langs = df_languages.head(10)
    ax1.bar(range(len(top_10_langs)), top_10_langs['count_stars'])
    ax1.set_xlabel('Programming Languages')
    ax1.set_ylabel('Repository Count')
    ax1.set_title('Repository Count by Language')
    ax1.set_xticks(range(len(top_10_langs)))
    ax1.set_xticklabels(top_10_langs['language'], rotation=45)
    
    # Average stars by language
    ax2.bar(range(len(top_10_langs)), top_10_langs['mean_stars'])
    ax2.set_xlabel('Programming Languages')
    ax2.set_ylabel('Average Stars per Repository')
    ax2.set_title('Average Quality by Language')
    ax2.set_xticks(range(len(top_10_langs)))
    ax2.set_xticklabels(top_10_langs['language'], rotation=45)
    
    plt.tight_layout()
    plt.show()

## Example 3: Topic Analysis

In [None]:
# Analyze repository topics
topics = analysis_results.get('topics', [])

if topics:
    df_topics = pd.DataFrame(topics)
    
    print("🏷️ Repository Topics Analysis:")
    display(df_topics.head(15))
    
    # Create topic visualization
    plt.figure(figsize=(12, 8))
    
    top_20_topics = df_topics.head(20)
    plt.barh(range(len(top_20_topics)), top_20_topics['count'])
    plt.xlabel('Number of Repositories')
    plt.ylabel('Topics')
    plt.title('Top 20 Repository Topics')
    plt.yticks(range(len(top_20_topics)), top_20_topics['topic'])
    plt.gca().invert_yaxis()
    
    # Add percentage labels
    for i, (count, percentage) in enumerate(zip(top_20_topics['count'], top_20_topics['percentage'])):
        plt.text(count + 0.5, i, f'{percentage:.1f}%', va='center')
    
    plt.tight_layout()
    plt.show()

## Example 4: Trend Predictions

In [None]:
# Analyze trend predictions
predictions = analysis_results.get('predictions', {})

if predictions:
    print("📈 Trend Predictions:")
    
    # Create DataFrame for predictions
    pred_data = []
    for language, pred in predictions.items():
        pred_data.append({
            'Language': language,
            'Current Score': pred['current_score'],
            'Growth Rate (%)': pred['growth_rate'],
            'Trend Direction': pred['trend_direction'],
            'Confidence (%)': pred['confidence']
        })
    
    df_predictions = pd.DataFrame(pred_data)
    df_predictions = df_predictions.sort_values('Growth Rate (%)', ascending=False)
    
    display(df_predictions)
    
    # Visualize growth predictions
    plt.figure(figsize=(12, 6))
    
    colors = ['green' if x > 0 else 'red' for x in df_predictions['Growth Rate (%)']]
    
    plt.barh(range(len(df_predictions)), df_predictions['Growth Rate (%)'], color=colors, alpha=0.7)
    plt.xlabel('Predicted Growth Rate (%)')
    plt.ylabel('Programming Languages')
    plt.title('Language Growth Predictions')
    plt.yticks(range(len(df_predictions)), df_predictions['Language'])
    plt.gca().invert_yaxis()
    plt.axvline(x=0, color='black', linestyle='-', linewidth=0.5)
    
    plt.tight_layout()
    plt.show()

## Example 5: Interactive Time Window Analysis

In [None]:
# Create interactive widget for time window selection
import ipywidgets as widgets
from IPython.display import display, clear_output

# Time window options
time_windows = [
    "last 12 months",
    "last 6 months", 
    "last month",
    "this month",
    "last two weeks",
    "last week",
    "today"
]

# Create widgets
query_widget = widgets.Text(
    value="language:javascript topic:react",
    description="Query:",
    style={'description_width': 'initial'}
)

time_widget = widgets.Dropdown(
    options=time_windows,
    value="last 6 months",
    description="Time Window:",
    style={'description_width': 'initial'}
)

max_repos_widget = widgets.IntSlider(
    value=100,
    min=50,
    max=500,
    step=50,
    description="Max Repos:",
    style={'description_width': 'initial'}
)

button = widgets.Button(
    description="Analyze",
    button_style='primary'
)

output = widgets.Output()

def analyze_button_click(b):
    with output:
        clear_output(wait=True)
        print(f"🔍 Analyzing: {query_widget.value}")
        print(f"⏰ Time window: {time_widget.value}")
        print(f"📊 Max repositories: {max_repos_widget.value}")
        
        try:
            # Clear previous data
            analyzer.repositories_data = []
            
            # Search repositories
            repos = analyzer.search_repositories(
                query=query_widget.value,
                max_repos=max_repos_widget.value,
                time_window=time_widget.value
            )
            
            if repos:
                print(f"✅ Found {len(repos)} repositories")
                
                # Quick analysis
                analysis = analyzer.analyze_all(
                    perform_clustering=False,
                    predict_trends=False
                )
                
                summary = analysis.get('summary', {})
                print(f"\n📈 Quick Stats:")
                print(f"  • Total Stars: {summary.get('total_stars', 0):,}")
                print(f"  • Average Stars: {summary.get('total_stars', 0) / len(repos):.1f}")
                print(f"  • Unique Languages: {summary.get('unique_languages', 0)}")
                
                # Show top repositories
                top_5 = sorted(repos, key=lambda x: x['stars'], reverse=True)[:5]
                print(f"\n🏆 Top 5 Repositories:")
                for i, repo in enumerate(top_5, 1):
                    print(f"  {i}. {repo['full_name']} - ⭐ {repo['stars']:,}")
            else:
                print("❌ No repositories found")
                
        except Exception as e:
            print(f"❌ Error: {e}")

button.on_click(analyze_button_click)

# Display widgets
print("🎛️ Interactive Repository Analysis")
display(query_widget, time_widget, max_repos_widget, button, output)

## Example 6: Export Results

In [None]:
# Generate comprehensive insights report
insights = analyzer.generate_insights_report()

print("📝 Insights Report Generated:")
print("\n🎯 Top Findings:")
for i, finding in enumerate(insights.get('top_findings', []), 1):
    print(f"  {i}. {finding}")

print("\n💡 Recommendations:")
for i, rec in enumerate(insights.get('recommendations', []), 1):
    print(f"  {i}. {rec}")

# Save all data
print("\n💾 Saving analysis data...")
saved_files = analyzer.save_all_data(filename_prefix="notebook_analysis")

print("✅ Files saved:")
for file_type, filename in saved_files.items():
    print(f"  • {file_type}: {filename}")

## Next Steps

- Try different search queries and time windows
- Explore the saved CSV and JSON files
- Check out the Python script examples in the `examples/` directory
- Modify the analysis parameters for your specific research needs

Happy analyzing! 🚀