## Import Libraries

In [4]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig

## Choose Meta Llama Model

In [None]:
model_id = "meta-llama/Llama-3.1-8B"

## Set the Precision and Load the Tokenizer and Model from Backup

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained("/nobackup/ielhaime/models/")
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
    "/nobackup/ielhaime/models/",
    quantization_config=bnb_config,
    device_map="auto")


## Setup the Text Gen Pipeline and Set Size of Reponse

In [None]:
text_generator = pipeline(
    "text_generation",
    tokenizer=tokenizer,
    model=model,
    max_new_tokens=1024
    )

## Summary Function

In [None]:
def get_summary(document):
    response = text_generator(document)
    gen_text = response[0]['generated_text']
    return gen_text

## Get Summaries

In [None]:
import os
import csv
from pathlib import Path

def get_summary(document):
    """Generate a summary for a given document text."""
    response = text_generator(document)
    gen_text = response[0]['generated_text']
    return gen_text

def read_text_file(file_path):
    """Read and return the contents of a text file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except UnicodeDecodeError:
        # Try a different encoding if UTF-8 fails
        with open(file_path, 'r', encoding='latin-1') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file {file_path}: {str(e)}")
        return None

def process_directory(directory_path='content', output_file='summaries.csv'):
    """
    Process all text files in the specified directory and save summaries to CSV.
    
    Args:
        directory_path (str): Path to the directory containing text files
        output_file (str): Name of the output CSV file
    """
    # Create content directory if it doesn't exist
    Path(directory_path).mkdir(exist_ok=True)
    
    # Get all .txt files in the directory
    txt_files = [f for f in os.listdir(directory_path) if f.endswith('.txt')]
    
    if not txt_files:
        print(f"No text files found in {directory_path}")
        return
    
    # Process each file and store results
    summaries = []
    for file_name in txt_files:
        file_path = os.path.join(directory_path, file_name)
        print(f"Processing {file_name}...")
        
        # Read the file content
        content = read_text_file(file_path)
        if content is None:
            continue
            
        try:
            # Generate summary
            summary = get_summary(content)
            summaries.append({
                'file_name': file_name,
                'summary': summary
            })
        except Exception as e:
            print(f"Error processing {file_name}: {str(e)}")
            continue
    
    # Write results to CSV
    if summaries:
        try:
            with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
                fieldnames = ['file_name', 'summary']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                
                writer.writeheader()
                writer.writerows(summaries)
            print(f"Summaries saved to {output_file}")
        except Exception as e:
            print(f"Error writing to CSV: {str(e)}")
    else:
        print("No summaries were generated")

if __name__ == "__main__":
    process_directory()