In [1]:
import json
import pandas as pd
import numpy as np
from collections import defaultdict
from pathlib import Path
import glob
import os

class EnhancedTrendAnalyzer:
    def __init__(self, data_folder="openalex_topics_results"):
        self.data_folder = Path(data_folder)
        self.field_cache = {}  # Cache for field names
        
    def load_and_combine_data(self):
        """Load and combine data from all raw article files"""
        all_papers = []
        pattern = os.path.join(self.data_folder, "highly_cited_articles_202*.json")
        files = glob.glob(pattern)
        
        print(f"Found {len(files)} data files")
        for file_path in files:
            with open(file_path, 'r', encoding='utf-8') as f:
                papers = json.load(f)
                all_papers.extend(papers)
                
        print(f"Loaded total of {len(all_papers)} papers")
        return all_papers
    
    def get_field_info(self, topic):
        """Extract field information from topic, with caching"""
        topic_id = topic['id']
        if topic_id in self.field_cache:
            return self.field_cache[topic_id]
        
        # Extract field from topic hierarchy
        parts = topic_id.split('/')
        field_id = parts[0]
        
        # Get display name for the field
        field_name = topic['display_name'].split(' › ')[0] if ' › ' in topic['display_name'] else topic['display_name']
        
        field_info = {
            'id': field_id,
            'name': field_name,
            'display': f"{field_name} ({field_id})"
        }
        
        self.field_cache[topic_id] = field_info
        return field_info
    
    def analyze_monthly_trends(self, papers, min_score=0.3):
        """Analyze cooperation patterns between topics and fields"""
        months = ['2023-01', '2023-02', '2023-03', '2023-04', '2023-05', '2023-06',
                 '2023-07', '2023-08', '2023-09', '2023-10', '2023-11', '2023-12',
                 '2024-01', '2024-02', '2024-03', '2024-04', '2024-05', '2024-06',
                 '2024-07', '2024-08', '2024-09', '2024-10', '2024-11', '2024-12']
        
        # Initialize data structures
        monthly_topic_cooperations = defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
        monthly_field_cooperations = defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
        monthly_topic_counts = defaultdict(lambda: defaultdict(int))
        monthly_field_counts = defaultdict(lambda: defaultdict(int))
        monthly_paper_counts = defaultdict(int)
        
        papers_per_month = len(papers) // len(months)
        
        for month_idx, month in enumerate(months):
            start_idx = month_idx * papers_per_month
            end_idx = start_idx + papers_per_month if month_idx < len(months) - 1 else len(papers)
            month_papers = papers[start_idx:end_idx]
            
            print(f"Processing {month}: {len(month_papers)} papers")
            monthly_paper_counts[month] = len(month_papers)
            
            for paper in month_papers:
                topics = paper.get("topics", [])
                relevant_topics = [
                    t for t in topics 
                    if t.get("score", 0) >= min_score and "display_name" in t and "id" in t
                ]
                
                # Group topics by field
                field_topics = defaultdict(list)
                for topic in relevant_topics:
                    field_info = self.get_field_info(topic)
                    field_topics[field_info['display']].append({
                        'topic_display': f"{topic['display_name']} ({topic['id']})",
                        'score': topic['score']
                    })
                    
                    # Update counts
                    topic_display = f"{topic['display_name']} ({topic['id']})"
                    monthly_topic_counts[month][topic_display] += 1
                    monthly_field_counts[month][field_info['display']] += 1
                
                # Analyze topic-level cooperations
                for i, topic1 in enumerate(relevant_topics[:-1]):
                    for topic2 in relevant_topics[i+1:]:
                        topic1_display = f"{topic1['display_name']} ({topic1['id']})"
                        topic2_display = f"{topic2['display_name']} ({topic2['id']})"
                        topic_pair = sorted([topic1_display, topic2_display])
                        weight = topic1["score"] * topic2["score"]
                        monthly_topic_cooperations[month][topic_pair[0]][topic_pair[1]] += weight
                
                # Analyze field-level cooperations
                fields = list(field_topics.keys())
                for i, field1 in enumerate(fields[:-1]):
                    for field2 in fields[i+1:]:
                        # Calculate aggregate weight between fields
                        field_weight = sum(
                            t1["score"] * t2["score"]
                            for t1 in field_topics[field1]
                            for t2 in field_topics[field2]
                        )
                        
                        sorted_fields = sorted([field1, field2])
                        monthly_field_cooperations[month][sorted_fields[0]][sorted_fields[1]] += field_weight
        
        return {
            'topic_cooperations': monthly_topic_cooperations,
            'field_cooperations': monthly_field_cooperations,
            'topic_counts': monthly_topic_counts,
            'field_counts': monthly_field_counts,
            'paper_counts': monthly_paper_counts
        }

    def calculate_monthly_metrics(self, trend_data, level='topic'):
        """Calculate metrics for each month at either topic or field level"""
        monthly_metrics = {}
        cooperations = trend_data[f'{level}_cooperations']
        counts = trend_data[f'{level}_counts']
        
        for month in sorted(cooperations.keys()):
            metrics_list = []
            month_cooperations = cooperations[month]
            month_counts = counts[month]
            
            for item1, targets in month_cooperations.items():
                for item2, weight in targets.items():
                    # Calculate normalized weight
                    total_possible = min(
                        month_counts[item1],
                        month_counts[item2]
                    )
                    
                    if total_possible > 0:
                        normalized_weight = weight / total_possible
                        
                        metrics_list.append({
                            f'{level}1': item1,
                            f'{level}2': item2,
                            'weight': weight,
                            'normalized_weight': normalized_weight,
                            f'{level}1_papers': month_counts[item1],
                            f'{level}2_papers': month_counts[item2]
                        })
            
            monthly_metrics[month] = metrics_list
            print(f"Calculated {level}-level metrics for {month}: {len(metrics_list)} pairs")
            
        return monthly_metrics
    
    def analyze_trends(self, monthly_metrics, level='topic'):
        """Convert monthly metrics into trend data"""
        pairs = set()
        for month_metrics in monthly_metrics.values():
            for metric in month_metrics:
                pairs.add((metric[f'{level}1'], metric[f'{level}2']))
        
        trend_data = {}
        print(f"\nAnalyzing {level}-level trends for {len(pairs)} pairs")
        
        for pair in pairs:
            item1, item2 = pair
            trend = []
            
            for month in sorted(monthly_metrics.keys()):
                month_data = next(
                    (m for m in monthly_metrics[month] 
                     if (m[f'{level}1'] == item1 and m[f'{level}2'] == item2) or
                        (m[f'{level}1'] == item2 and m[f'{level}2'] == item1)),
                    None
                )
                
                if month_data:
                    trend.append({
                        'month': month,
                        'weight': month_data['weight'],
                        'normalized_weight': month_data['normalized_weight']
                    })
            
            if len(trend) > 1:
                trend_data[pair] = {
                    'trend': trend,
                    'items': pair
                }
        
        print(f"Found {len(trend_data)} {level}-level pairs with valid trends")
        return trend_data
    
    def identify_patterns(self, trend_data, level='topic'):
        """Identify different patterns in the trends"""
        patterns = {
            'rising_stars': [],
            'stable_strong': [],
            'emerging': [],
        }
        
        for pair, data in trend_data.items():
            trend = data['trend']
            weights = [t['normalized_weight'] for t in trend]
            
            if len(weights) < 2:
                continue
                
            # Calculate metrics
            growth_rate = (weights[-1] - weights[0]) / weights[0] if weights[0] > 0 else 0
            avg_weight = np.mean(weights)
            volatility = np.std(weights) / avg_weight if avg_weight > 0 else 0
            
            # Calculate momentum (using more relaxed thresholds for fields)
            if len(weights) >= 3:
                first_half = weights[:len(weights)//2]
                second_half = weights[len(weights)//2:]
                first_growth = np.mean(np.diff(first_half)) if len(first_half) > 1 else 0
                second_growth = np.mean(np.diff(second_half)) if len(second_half) > 1 else 0
                momentum = second_growth - first_growth
            else:
                momentum = 0
            
            metrics = {
                'growth_rate': growth_rate,
                'momentum': momentum,
                'average_weight': avg_weight,
                'volatility': volatility
            }
            
            # Use different thresholds for fields vs topics
            if level == 'field':
                growth_threshold = 0.05  # More relaxed for fields
                momentum_threshold = 0
                avg_weight_threshold = 0.2
                volatility_threshold = 0.3
            else:
                growth_threshold = 0.1
                momentum_threshold = 0
                avg_weight_threshold = 0.3
                volatility_threshold = 0.2
            
            # Classify the trend
            if growth_rate > growth_threshold and momentum > momentum_threshold:
                patterns['rising_stars'].append({
                    'items': pair,
                    'metrics': metrics,
                    'trend': trend
                })
            elif avg_weight > avg_weight_threshold and volatility < volatility_threshold:
                patterns['stable_strong'].append({
                    'items': pair,
                    'metrics': metrics,
                    'trend': trend
                })
            elif growth_rate > 0 and momentum > momentum_threshold:
                patterns['emerging'].append({
                    'items': pair,
                    'metrics': metrics,
                    'trend': trend
                })
        
        # Sort patterns
        for category in patterns:
            patterns[category].sort(
                key=lambda x: (
                    x['metrics']['growth_rate'] 
                    if category != 'stable_strong' 
                    else x['metrics']['average_weight']
                ),
                reverse=True
            )
        
        return patterns

def print_trend_report(topic_patterns, field_patterns, monthly_paper_counts):
    """Print a formatted version of the trend report for both topics and fields"""
    print("\n=== Research Cooperation Trend Analysis ===\n")
    
    print("Monthly Paper Distribution:")
    for month, count in sorted(monthly_paper_counts.items()):
        print(f"{month}: {count} papers")
    
    print("\n=== Topic-Level Patterns ===")
    print(f"Rising Stars: {len(topic_patterns['rising_stars'])}")
    print(f"Stable Strong: {len(topic_patterns['stable_strong'])}")
    print(f"Emerging: {len(topic_patterns['emerging'])}")
    
    print("\n=== Field-Level Patterns ===")
    print(f"Rising Stars: {len(field_patterns['rising_stars'])}")
    print(f"Stable Strong: {len(field_patterns['stable_strong'])}")
    print(f"Emerging: {len(field_patterns['emerging'])}")
    
    if topic_patterns['rising_stars']:
        print("\nTop Rising Star Topic Cooperations:")
        for pair in topic_patterns['rising_stars'][:25]:
            print(f"\n{pair['items'][0]} × {pair['items'][1]}")
            metrics = pair['metrics']
            print(f"  Growth Rate: {metrics['growth_rate']:.2f}")
            print(f"  Momentum: {metrics['momentum']:.2f}")
            print(f"  Average Weight: {metrics['average_weight']:.2f}")
            print(f"  Volatility: {metrics['volatility']:.2f}")
    
    if field_patterns['rising_stars']:
        print("\nTop Rising Star Field Cooperations:")
        for pair in field_patterns['rising_stars'][:25]:
            print(f"\n{pair['items'][0]} × {pair['items'][1]}")
            metrics = pair['metrics']
            print(f"  Growth Rate: {metrics['growth_rate']:.2f}")
            print(f"  Momentum: {metrics['momentum']:.2f}")
            print(f"  Average Weight: {metrics['average_weight']:.2f}")
            print(f"  Volatility: {metrics['volatility']:.2f}")

In [2]:
import networkx as nx
import community.community_louvain as community_louvain
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import numpy as np
from collections import defaultdict
import seaborn as sns

class ResearchNetworkVisualizer:
    def __init__(self, monthly_metrics, patterns):
        self.monthly_metrics = monthly_metrics
        self.patterns = patterns
        self.color_palette = sns.color_palette("husl", 20)
        
    def create_network(self, month, min_weight=0.1):
        """Create a NetworkX graph for a specific month with minimum weight filtering
        
        Args:
            month: The month to visualize
            min_weight: Minimum normalized weight threshold for including connections (default: 0.1)
        """
        G = nx.Graph()
        
        # Add nodes and edges from monthly metrics
        metrics = self.monthly_metrics[month]
        for metric in metrics:
            field1, field2 = metric['field1'], metric['field2']
            weight = metric['normalized_weight']
            
            # Only add edges above minimum weight threshold
            if weight >= min_weight:
                # Add nodes if they don't exist
                if not G.has_node(field1):
                    G.add_node(field1, papers=metric['field1_papers'])
                if not G.has_node(field2):
                    G.add_node(field2, papers=metric['field2_papers'])
                
                # Add edge with weight
                G.add_edge(field1, field2, weight=float(weight))  # Ensure weight is float
        
        return G
    
    def get_node_colors(self, G, partition=None):
        """Get node colors based on community detection or node strength"""
        if partition:
            # Colors based on communities
            return [self.color_palette[partition[node] % len(self.color_palette)] 
                    for node in G.nodes()]
        else:
            # Colors based on node strength (sum of edge weights)
            strengths = []
            for node in G.nodes():
                # Properly sum edge weights
                strength = sum(float(G[node][neighbor]['weight']) 
                             for neighbor in G[node])
                strengths.append(strength)
            
            if not strengths:  # Handle empty graph
                return []
            
            max_strength = max(strengths) if strengths else 1
            normalized_strengths = np.array(strengths) / max_strength if max_strength > 0 else np.zeros_like(strengths)
            return plt.cm.viridis(normalized_strengths)
    
    def get_node_sizes(self, G):
        """Calculate node sizes based on number of papers"""
        paper_counts = [G.nodes[node].get('papers', 0) for node in G.nodes()]
        if not paper_counts:  # Handle empty graph
            return []
            
        # Scale node sizes between 100 and 2000
        min_size, max_size = 100, 2000
        if max(paper_counts) == min(paper_counts):
            return [min_size] * len(paper_counts)
        normalized_sizes = [(count - min(paper_counts)) / (max(paper_counts) - min(paper_counts))
                          for count in paper_counts]
        return [min_size + (max_size - min_size) * size for size in normalized_sizes]
    
    def visualize_network(self, month, figsize=(15, 10), with_communities=True, min_weight=0.1, save_path=None):
        """Create a static network visualization for a specific month"""
        G = self.create_network(month, min_weight=min_weight)
        
        # Remove isolated nodes
        G.remove_nodes_from(list(nx.isolates(G)))
        
        if len(G.nodes()) == 0:
            print(f"No connections found for {month}")
            return
        
        # Set up the plot
        plt.figure(figsize=figsize)
        
        # Detect communities if requested
        partition = None
        if with_communities:
            partition = community_louvain.best_partition(G)
        
        # Get node colors and sizes
        node_colors = self.get_node_colors(G, partition)
        node_sizes = self.get_node_sizes(G)
        
        if len(node_colors) == 0 or len(node_sizes) == 0:  # Handle empty graph
            print(f"No valid nodes to display for {month}")
            return
        
        # Create layout
        pos = nx.spring_layout(G, k=1/np.sqrt(len(G.nodes())), iterations=50)
        
        # Draw the network
        edge_weights = [G[u][v]['weight'] * 5 for u, v in G.edges()]
        nx.draw_networkx_edges(G, pos, alpha=0.2, width=edge_weights)
        nx.draw_networkx_nodes(G, pos, node_color=node_colors, 
                             node_size=node_sizes, alpha=0.7)
        
        # Add labels for larger nodes
        large_nodes = [node for node, size in zip(G.nodes(), node_sizes) 
                      if size > np.mean(node_sizes)]
        labels = {node: node for node in large_nodes}
        nx.draw_networkx_labels(G, pos, labels, font_size=6)
        
        plt.title(f'Research Field Network - {month}')
        plt.axis('off')
        
        if save_path:
            plt.savefig(save_path, bbox_inches='tight', dpi=300)
            plt.close()
        else:
            plt.show()
    
    def visualize_growth_network(self, min_growth_rate=0.05, figsize=(15, 10), save_path=None):
        """Create a visualization of the growth network"""
        G = nx.Graph()
        
        # Add nodes and edges from rising stars and emerging patterns
        for pattern_type in ['rising_stars', 'emerging']:
            for pair in self.patterns[pattern_type]:
                field1, field2 = pair['items']
                metrics = pair['metrics']
                
                if metrics['growth_rate'] >= min_growth_rate:
                    # Add nodes if they don't exist
                    if not G.has_node(field1):
                        G.add_node(field1)
                    if not G.has_node(field2):
                        G.add_node(field2)
                    
                    # Add edge with growth rate as weight
                    G.add_edge(field1, field2, 
                             weight=float(metrics['growth_rate']),  # Ensure weight is float
                             type=pattern_type)
        
        if len(G.nodes()) == 0:
            print(f"No connections found with growth rate >= {min_growth_rate}")
            return
        
        # Set up the plot
        plt.figure(figsize=figsize)
        
        # Create layout
        pos = nx.spring_layout(G, k=1/np.sqrt(len(G.nodes())), iterations=50)
        
        # Draw edges with different colors based on pattern type
        edge_colors = {'rising_stars': 'red', 'emerging': 'blue'}
        for pattern_type in edge_colors:
            edge_list = [(u, v) for (u, v, d) in G.edges(data=True) 
                        if d['type'] == pattern_type]
            if edge_list:
                weights = [G[u][v]['weight'] * 5 for (u, v) in edge_list]
                nx.draw_networkx_edges(G, pos, edgelist=edge_list, 
                                     edge_color=edge_colors[pattern_type],
                                     width=weights, alpha=0.6)
        
        # Draw nodes
        nx.draw_networkx_nodes(G, pos, node_color='lightblue', 
                             node_size=1000, alpha=0.7)
        
        # Add labels
        labels = {node: node for node in G.nodes()}
        nx.draw_networkx_labels(G, pos, labels, font_size=6)
        
        plt.title('Research Field Growth Network')
        plt.axis('off')
        
        # Add legend
        legend_elements = [plt.Line2D([0], [0], color=color, label=pattern)
                         for pattern, color in edge_colors.items()]
        plt.legend(handles=legend_elements, loc='upper left', bbox_to_anchor=(1, 1))
        
        if save_path:
            plt.savefig(save_path, bbox_inches='tight', dpi=300)
            plt.close()
        else:
            plt.show()
    
    def create_animation(self, save_path, figsize=(15, 10)):
        """Create an animation showing network evolution over time"""
        fig, ax = plt.subplots(figsize=figsize)
        
        # Get all months
        months = sorted(self.monthly_metrics.keys())
        if not months:
            print("No monthly data available for animation")
            return
        
        # Create initial network to establish consistent layout
        G_initial = self.create_network(months[0])
        pos = nx.spring_layout(G_initial, k=1/np.sqrt(len(G_initial.nodes())), iterations=50)
        
        # Store frames
        frames = []
        
        for month in months:
            # Clear the current frame
            ax.clear()
            
            G = self.create_network(month)
            G.remove_nodes_from(list(nx.isolates(G)))
            
            if len(G.nodes()) > 0:
                # Use consistent layout for nodes that exist in both networks
                pos_frame = {node: pos[node] for node in G.nodes() if node in pos}
                new_nodes = set(G.nodes()) - set(pos_frame.keys())
                if new_nodes:
                    pos_new = nx.spring_layout(G.subgraph(new_nodes), k=1/np.sqrt(len(G.nodes())))
                    pos_frame.update(pos_new)
                
                # Draw the network
                node_colors = self.get_node_colors(G)
                node_sizes = self.get_node_sizes(G)
                
                if len(node_colors) > 0 and len(node_sizes) > 0:  # Only draw if we have valid data
                    edge_weights = [G[u][v]['weight'] * 5 for u, v in G.edges()]
                    nx.draw_networkx_edges(G, pos_frame, alpha=0.2, width=edge_weights)
                    nx.draw_networkx_nodes(G, pos_frame, node_color=node_colors,
                                         node_size=node_sizes, alpha=0.7)
                    
                    # Add labels for larger nodes
                    large_nodes = [node for node, size in zip(G.nodes(), node_sizes)
                                 if size > np.mean(node_sizes)]
                    labels = {node: node for node in large_nodes}
                    nx.draw_networkx_labels(G, pos_frame, labels, font_size=6)
            
            ax.set_title(f'Research Field Network - {month}')
            ax.axis('off')
            
            # Save the frame
            fig.canvas.draw()
            # Get the RGBA buffer from the figure
            w, h = fig.canvas.get_width_height()
            buf = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
            # Reshape it to a proper image array
            buf = buf.reshape((h, w, 4))
            # Convert RGBA to RGB
            frame = buf[:, :, :3]
            frames.append(frame)
        
        if frames:  # Only save if we have frames
            # Save frames as GIF
            import imageio
            imageio.mimsave(save_path, frames, fps=1)
        
        plt.close()

# Example usage remains the same

In [3]:
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from collections import defaultdict

def prepare_visualization_data(analyzer, field_metrics, field_patterns):
    """
    Prepare data structures for visualization from analyzer output
    """
    def clean_field_name(name):
        """Remove (https:) from field names"""
        return name.replace(" (https:", "").replace(")", "") if "(https:" in name else name
    
    # Format monthly metrics into the expected structure
    monthly_metrics = defaultdict(list)
    
    for month, metrics in field_metrics.items():
        for metric in metrics:
            monthly_metrics[month].append({
                'field1': clean_field_name(metric['field1']),
                'field2': clean_field_name(metric['field2']),
                'weight': metric['weight'],
                'normalized_weight': metric['normalized_weight'],
                'field1_papers': metric['field1_papers'],
                'field2_papers': metric['field2_papers']
            })
    
    # Format patterns into the expected structure
    formatted_patterns = {
        'rising_stars': [],
        'stable_strong': [],
        'emerging': []
    }
    
    for category in ['rising_stars', 'stable_strong', 'emerging']:
        for pair in field_patterns[category]:
            formatted_patterns[category].append({
                'items': tuple(clean_field_name(item) for item in pair['items']),
                'metrics': pair['metrics'],
                'trend': pair['trend']
            })
    
    return monthly_metrics, formatted_patterns

def run_visualizations(analyzer, field_metrics, field_patterns):
    """
    Run all visualizations with proper data formatting
    """
    # Prepare data for visualization
    monthly_metrics, patterns = prepare_visualization_data(analyzer, field_metrics, field_patterns)
    
    # Initialize visualizer
    visualizer = ResearchNetworkVisualizer(monthly_metrics, patterns)
    
    # Create static visualizations for specific months
    print("Creating network visualization for 2023-01...")
    visualizer.visualize_network('2023-01', 
                               figsize=(8, 6),
                               with_communities=True,
                               min_weight=0.5,
                               save_path='network_2023_01.png')
    
    print("Creating network visualization for 2024-12...")
    visualizer.visualize_network('2024-12',
                               figsize=(8, 6),
                               with_communities=True,
                               min_weight=0.5,
                               save_path='network_2024_12.png')
    
    # Create growth network visualization
    print("Creating growth network visualization...")
    visualizer.visualize_growth_network(min_growth_rate=0.025,
                                      figsize=(8, 6),
                                      save_path='growth_network.png')
    
    # Create animation
    print("Creating network evolution animation...")
    visualizer.create_animation('network_evolution.gif')
    
    print("All visualizations completed!")

# Example usage:
"""
# After running your analysis:
analyzer = EnhancedTrendAnalyzer()
papers = analyzer.load_and_combine_data()

# Get monthly trends data
trend_data = analyzer.analyze_monthly_trends(papers)

# Calculate metrics for fields
field_metrics = analyzer.calculate_monthly_metrics(trend_data, level='field')

# Analyze field-level trends
field_trends = analyzer.analyze_trends(field_metrics, level='field')

# Identify field-level patterns
field_patterns = analyzer.identify_patterns(field_trends, level='field')

# Run visualizations
run_visualizations(analyzer, field_metrics, field_patterns)
"""

"\n# After running your analysis:\nanalyzer = EnhancedTrendAnalyzer()\npapers = analyzer.load_and_combine_data()\n\n# Get monthly trends data\ntrend_data = analyzer.analyze_monthly_trends(papers)\n\n# Calculate metrics for fields\nfield_metrics = analyzer.calculate_monthly_metrics(trend_data, level='field')\n\n# Analyze field-level trends\nfield_trends = analyzer.analyze_trends(field_metrics, level='field')\n\n# Identify field-level patterns\nfield_patterns = analyzer.identify_patterns(field_trends, level='field')\n\n# Run visualizations\nrun_visualizations(analyzer, field_metrics, field_patterns)\n"

In [4]:
analyzer = EnhancedTrendAnalyzer()
papers = analyzer.load_and_combine_data()

# Get monthly trends data
trend_data = analyzer.analyze_monthly_trends(papers)

# Calculate metrics for both levels
topic_metrics = analyzer.calculate_monthly_metrics(trend_data, level='topic')
field_metrics = analyzer.calculate_monthly_metrics(trend_data, level='field')

# Analyze trends for both levels
topic_trends = analyzer.analyze_trends(topic_metrics, level='topic')
field_trends = analyzer.analyze_trends(field_metrics, level='field')

# Identify patterns
topic_patterns = analyzer.identify_patterns(topic_trends, level='topic')
field_patterns = analyzer.identify_patterns(field_trends, level='field')

# Print comprehensive report
print_trend_report(topic_patterns, field_patterns, trend_data['paper_counts'])

# # After running your analysis:
# analyzer = EnhancedTrendAnalyzer()
# papers = analyzer.load_and_combine_data()

# # Get monthly trends data
# trend_data = analyzer.analyze_monthly_trends(papers)

# # Calculate metrics for fields
# field_metrics = analyzer.calculate_monthly_metrics(trend_data, level='field')

# # Analyze field-level trends
# field_trends = analyzer.analyze_trends(field_metrics, level='field')

# # Identify field-level patterns
# field_patterns = analyzer.identify_patterns(field_trends, level='field')

# Run visualizations
run_visualizations(analyzer, field_metrics, field_patterns)

Found 28 data files
Loaded total of 405813 papers
Processing 2023-01: 16908 papers
Processing 2023-02: 16908 papers
Processing 2023-03: 16908 papers
Processing 2023-04: 16908 papers
Processing 2023-05: 16908 papers
Processing 2023-06: 16908 papers
Processing 2023-07: 16908 papers
Processing 2023-08: 16908 papers
Processing 2023-09: 16908 papers
Processing 2023-10: 16908 papers
Processing 2023-11: 16908 papers
Processing 2023-12: 16908 papers
Processing 2024-01: 16908 papers
Processing 2024-02: 16908 papers
Processing 2024-03: 16908 papers
Processing 2024-04: 16908 papers
Processing 2024-05: 16908 papers
Processing 2024-06: 16908 papers
Processing 2024-07: 16908 papers
Processing 2024-08: 16908 papers
Processing 2024-09: 16908 papers
Processing 2024-10: 16908 papers
Processing 2024-11: 16908 papers
Processing 2024-12: 16929 papers
Calculated topic-level metrics for 2023-01: 8126 pairs
Calculated topic-level metrics for 2023-02: 10706 pairs
Calculated topic-level metrics for 2023-03: 130

In [None]:
papers = analyzer.load_and_combine_data()

# Get monthly trends data
trend_data = analyzer.analyze_monthly_trends(papers)

# Calculate metrics for both levels
topic_metrics = analyzer.calculate_monthly_metrics(trend_data, level='topic')
field_metrics = analyzer.calculate_monthly_metrics(trend_data, level='field')

# Analyze trends for both levels
topic_trends = analyzer.analyze_trends(topic_metrics, level='topic')
field_trends = analyzer.analyze_trends(field_metrics, level='field')

# Identify patterns
topic_patterns = analyzer.identify_patterns(topic_trends, level='topic')
field_patterns = analyzer.identify_patterns(field_trends, level='field')


In [None]:
import pickle

# Save the file
pickle.dump(company1, file = open("topic_metrics.pickle", "wb"))