In [52]:
import pandas as pd
import os
import ast

In [None]:
class MarkdownCreatorProjects:
    def __init__(self, type, data_file, name=None, column_mapping=None):
        self.type = type
        self.data_file = data_file
        self.name = name
        self.column_mapping = column_mapping or {}  # Maps placeholder -> CSV column
        self.md_content = self.load_md_file(type)
        self.data_content = self.load_csv_file(data_file)
        self.content = self.md_content  # Initialize content with template
    
    def load_csv_file(self, data_file):
        """Load csv data files."""
        try:
            return pd.read_csv(f"data/csv/{data_file}.csv")
        except FileNotFoundError:
            print("Data is needed")
            return pd.DataFrame()  # Return empty DataFrame instead of string
    
    def load_md_file(self, type):
        """Load markdown template."""
        template_paths = {
            "Projekt": "data/template/Vorlage_Projekt.md",
            "Art": "data/template/Vorlage_Art.md",
            "Einsatzbereich": "data/template/Vorlage_Einsatzbereich.md"
        }
        
        if type not in template_paths:
            print("Type is needed - valid types: Projekt, Art, Einsatzbereich")
            return ""
        
        try:
            with open(template_paths[type], 'r', encoding='utf-8') as f:
                return f.read()
        except FileNotFoundError:
            print(f"Template file not found: {template_paths[type]}")
            return ""
    
    def create_md_file(self, name, row_index=0):
        """Create markdown file with given name using CSV data."""
        self.name = name
        
        if self.data_content.empty:
            print("No CSV data available")
            return self.content
        
        # Get the row data (default to first row)
        if row_index >= len(self.data_content):
            print(f"Row index {row_index} out of range. Using first row.")
            row_index = 0
        
        row_data = self.data_content.iloc[row_index]
        self.content = self.populate_template(row_data)
        return self.content
    
    def populate_template(self, row_data):
        """Populate markdown template with CSV row data."""
        content = self.md_content
        
        # First, handle custom mappings
        for placeholder, csv_column in self.column_mapping.items():
            if csv_column in row_data:
                value = row_data[csv_column]
                placeholder_formatted = f"{{{{{placeholder}}}}}"
                content = self._replace_placeholder_with_value(content, placeholder_formatted, value)
        
        # Then, handle automatic column matching
        for column, value in row_data.items():
            # Skip if this column was already handled by custom mapping
            if column in self.column_mapping.values():
                continue
            
            # Replace placeholders in format {{column_name}}
            placeholder = f"{{{{{column}}}}}"
            content = self._replace_placeholder_with_value(content, placeholder, value)
            
            # Also try lowercase placeholder format
            placeholder_lower = f"{{{{{column.lower()}}}}}"
            content = self._replace_placeholder_with_value(content, placeholder_lower, value)
            
            # Try with spaces replaced by underscores
            column_underscore = column.replace(" ", "_")
            placeholder_underscore = f"{{{{{column_underscore}}}}}"
            content = self._replace_placeholder_with_value(content, placeholder_underscore, value)
        
        return content
    
    def _parse_value(self, value):
        """Parse string representations of lists back into actual lists."""
        if pd.isna(value):
            return ""
        
        value_str = str(value).strip()
        
        # Check if it looks like a list string representation
        if value_str.startswith('[') and value_str.endswith(']'):
            try:
                # Try to safely evaluate the list string
                parsed_value = ast.literal_eval(value_str)
                if isinstance(parsed_value, list):
                    return parsed_value
            except (ValueError, SyntaxError):
                # If parsing fails, try simple comma splitting
                content = value_str[1:-1].strip()  # Remove brackets
                if content:
                    # Split by comma and clean up each item
                    items = [item.strip().strip("'\"") for item in content.split(',')]
                    return [item for item in items if item]  # Remove empty items
        
        return value_str
    
    def _replace_placeholder_with_value(self, content, placeholder, value):
        """Handle replacement of placeholder with value, including list values."""
        # Parse the value first to handle string representations of lists
        parsed_value = self._parse_value(value)
        
        if isinstance(parsed_value, list):
            # Check if placeholder is already in a bullet point context
            if f"- {placeholder}" in content:
                list_items = "\n".join([f"- #{item}" for item in parsed_value])
                return content.replace(f"- {placeholder}", list_items)
            # Check if placeholder is in numbered list context  
            elif f"1. {placeholder}" in content or f"1) {placeholder}" in content:
                numbered_items = "\n".join([f"{i+1}. {item}" for i, item in enumerate(parsed_value)])
                return content.replace(f"1. {placeholder}", numbered_items).replace(f"1) {placeholder}", numbered_items)
            # Check for special list formatting hints
            elif f"{placeholder}_list" in content:
                list_items = "\n".join([f"- {item}" for item in parsed_value])
                return content.replace(f"{placeholder}_list", list_items)
            elif f"{placeholder}_comma" in content:
                comma_list = ", ".join([str(item) for item in parsed_value])
                return content.replace(f"{placeholder}_comma", comma_list)
            elif f"{placeholder}_numbered" in content:
                numbered_items = "\n".join([f"{i+1}. {item}" for i, item in enumerate(parsed_value)])
                return content.replace(f"{placeholder}_numbered", numbered_items)
            else:
                # Default: join with commas instead of duplicating content
                comma_list = ", ".join([str(item) for item in parsed_value])
                return content.replace(placeholder, comma_list)
        else:
            # For single values, normal replacement
            return content.replace(placeholder, str(parsed_value))
    
    def create_multiple_files(self):
        """Create markdown files for all rows in CSV."""
        if self.data_content.empty:
            print("No CSV data available")
            return
        
        created_files = []
        for index, row in self.data_content.iterrows():
            # Use "Projektname" column as filename if it exists
            if "Projektname" in self.data_content.columns and pd.notna(row["Projektname"]):
                filename = str(row["Projektname"]).replace(" ", "_").replace("/", "_")
                # Clean filename - remove special characters but keep underscores and hyphens
                filename = "".join(c for c in filename if c.isalnum() or c in "_-")
            else:
                # Fallback to generic name if Projektname doesn't exist or is empty
                filename = f"projekt_{index}"
            
            # Ensure filename is not empty
            if not filename:
                filename = f"projekt_{index}"
                
            self.create_md_file(filename, index)
            self.save_file()
            created_files.append(filename)
        
        print(f"Created {len(created_files)} files: {created_files}")
        return created_files
    
    def save_file(self):
        """Save current content to file."""
        if not self.name:
            print("No name specified for file")
            return
        
        # Create directory if it doesn't exist
        dir_path = f"data/{self.type}"
        os.makedirs(dir_path, exist_ok=True)
        
        try:
            with open(f"{dir_path}/{self.name}.md", 'w', encoding='utf-8') as f:
                f.write(self.content)
            print(f"Saved {self.name}")
        except Exception as e:
            print(f"Error saving file: {e}")

In [68]:
creator = MarkdownCreatorProjects("Projekt", "Correlaid_Projekte")
creator.create_md_file("test", row_index=2)
creator.save_file()

Saved test


In [74]:
creator = MarkdownCreatorProjects("Projekt", "Correlaid_Projekte")
creator.create_multiple_files()

Saved Automatisierung_von_Reportings_aus_Evaluationsdaten
Saved Improving_and_upgrading_the_Silbernetz_dashboard
Saved Automatisiertes_Qualitätsmanagement_für_ein_Mentoringprogramm
Saved Data_Story_mit_Konstanzer_Klima-_und_Wetterdaten
Saved Automatisiertes_Monitoring_der_Zielgruppenentwicklung_im_Zeitverlauf
Saved Open_Data_aus_Bürger-_und_Jugendbeteiligungsprojekten
Saved Back-_und_Frontend_Contententwicklung_UX-Testing_und_rechtliche_Prüfung_mit_Umsetzung_der_entsprechenden_Änderungen_des_Webseiten-Relaunchs_von_Leerstandsmelderde
Saved Automatisierte_Fragebogenauswertung_mit_Generativer_KI
Saved Mithilfe_von_KI_Wirkungsmessung_skalieren
Saved Mit_Daten_zu_transparenterer_Demokratie
Saved R_Wrapper_for_the_Genesis_API
Saved ETL_Pipeline_von_Limesurvey_zu_einer_Datenbank
Saved Data_Flow_von_R_zu_Metabase_zu_Google_Docs_Ein_Live-Connected_Reporting-Workflow
Saved Wo_sollen_neue_Fahrradparkplätze_in_Paris_entstehen_Unterstützung_der_Entscheidungsfindung_mit_offenen_Daten
Saved Qualität

['Automatisierung_von_Reportings_aus_Evaluationsdaten',
 'Improving_and_upgrading_the_Silbernetz_dashboard',
 'Automatisiertes_Qualitätsmanagement_für_ein_Mentoringprogramm',
 'Data_Story_mit_Konstanzer_Klima-_und_Wetterdaten',
 'Automatisiertes_Monitoring_der_Zielgruppenentwicklung_im_Zeitverlauf',
 'Open_Data_aus_Bürger-_und_Jugendbeteiligungsprojekten',
 'Back-_und_Frontend_Contententwicklung_UX-Testing_und_rechtliche_Prüfung_mit_Umsetzung_der_entsprechenden_Änderungen_des_Webseiten-Relaunchs_von_Leerstandsmelderde',
 'Automatisierte_Fragebogenauswertung_mit_Generativer_KI',
 'Mithilfe_von_KI_Wirkungsmessung_skalieren',
 'Mit_Daten_zu_transparenterer_Demokratie',
 'R_Wrapper_for_the_Genesis_API',
 'ETL_Pipeline_von_Limesurvey_zu_einer_Datenbank',
 'Data_Flow_von_R_zu_Metabase_zu_Google_Docs_Ein_Live-Connected_Reporting-Workflow',
 'Wo_sollen_neue_Fahrradparkplätze_in_Paris_entstehen_Unterstützung_der_Entscheidungsfindung_mit_offenen_Daten',
 'Qualitätsanalyse_von_OpenStreetMap_Daten

In [None]:
# Example usage:

# Create single file from first CSV row:
# creator.create_md_file("my_project")
# 

# Create single file from specific CSV row:
# creator.create_md_file("project_name", row_index=2)
# creator.save_file()

# Create files for all CSV rows:
# creator.create_multiple_files()

# Example CSV structure and markdown template:
# CSV columns: Name, Description, Budget, Status
# Template placeholders: {{Name}}, {{Description}}, {{Budget}}, {{Status}}