# Assignment 1. Working with directories and files in Python


---

## Task 1: Working with folders and files

### 1. Creating a project structure
Write a script that creates a project directory with subfolders:
- data
- results

In [None]:
import os
from pathlib import Path

# Create project structure
data_dir = Path('data')
results_dir = Path('results')
data_dir.mkdir(exist_ok=True)
results_dir.mkdir(exist_ok=True)
print(f"Created folders: {data_dir}, {results_dir}")

### 2. Working with CSV files
In the data folder, save the students.csv file with data about students (full name, group, grades).
Read data from students.csv and calculate the average score for each student.

In [4]:
import pandas as pd

# Read students.csv from data folder
students_csv_path = data_dir / 'students.csv'
students_df = pd.read_csv(students_csv_path)
grade_columns = ["math", "physics", "cs", "history", "english"]
students_df['Average'] = students_df[grade_columns].mean(axis=1)
print(students_df[["full_name", "Average"]])

                full_name  Average
0        Shomanov Ayaulym     73.0
1         Sadykova Moldir     68.8
2          Sadykova Aliya     76.4
3        Imanbekov Yerlan     72.8
4    Nurmaganbet Temirlan     71.6
..                    ...      ...
145      Kaldybek Ayaulym     80.6
146         Beketov Damir     83.2
147         Kaldybek Madi     80.2
148      Zhussupov Gulnaz     75.0
149       Kaldybek Gulnaz     83.0

[150 rows x 2 columns]


### 3. Save results
Save aggregated results in JSON format (results/report.json).
Check: if the file already exists, display a warning and ask for confirmation for overwriting.

In [None]:
import json

report_path = results_dir / 'report.json'
average_scores = dict(zip(students_df['full_name'], students_df['Average']))

with open(report_path, 'w') as f:
    json.dump(average_scores, f, indent=4)



Saved report to results\report.json


### 4. Archiving
Automatically create an archive results.zip, where the results folder is saved.

In [7]:
import shutil

# Archive the results folder
shutil.make_archive('results', 'zip', root_dir=results_dir)
print("Created archive: results.zip")

Created archive: results.zip


### 5. Working with pathlib
Implement a check to see if report.json exists. If yes, print its size and last modification date.

In [8]:
from datetime import datetime

if report_path.exists():
    size = report_path.stat().st_size
    mtime = datetime.fromtimestamp(report_path.stat().st_mtime)
    print(f"report.json exists. Size: {size} bytes. Last modified: {mtime}")
else:
    print("report.json does not exist.")

report.json exists. Size: 4083 bytes. Last modified: 2025-09-18 19:44:48.020844


## Task 2: Search for a word in a text file
Generate 5 text files (name_#.txt), put them in the same folder. The files contain any text. Write a program that accepts a search query and outputs the names of text files containing the desired substring.

In [None]:
# Generate 5 text files with sample content
sample_texts = [
    "Mathematics is the language of the universe.",
    "Probability theory is a branch of mathematics.",
    "Artificial intelligence uses probability and statistics.",
    "Data science involves mathematics, statistics, and coding.",
    "Machine learning is a subset of artificial intelligence."
]

text_dir = Path('text_files')
text_dir.mkdir(exist_ok=True)

for i, text in enumerate(sample_texts, 1):
    file_path = text_dir / f"file_{i}.txt"
    with open(file_path, 'w') as f:
        f.write(text)
    print(f"Created {file_path}")

In [None]:
# Search for a substring in all text files
def search_files(query, directory):
    found_files = []
    for file in directory.glob('*.txt'):
        with open(file, 'r') as f:
            content = f.read()
            if query.lower() in content.lower():
                found_files.append(file.name)
    return found_files

search_query = input("Enter search query: ")
matching_files = search_files(search_query, text_dir)

if matching_files:
    print("Files containing the search query:")
    for fname in matching_files:
        print(fname)
else:
    print("The search query is missing in these files.")

## Task 3: File Information
There is a file available file.txt. Write a program that outputs the following text statistics:
- the number of letters of the Latin alphabet
- number of words
- number of lines

In [None]:
import re

file_txt_path = Path('file.txt')

with open(file_txt_path, 'r') as f:
    text = f.read()

latin_letters = len(re.findall(r'[A-Za-z]', text))
words = len(re.findall(r'\b\w+\b', text))
lines = text.count('\n') + 1 if text else 0

print(f"Latin letters: {latin_letters}")
print(f"Words: {words}")
print(f"Lines: {lines}")

## Task 4: Forchanging words
There are two files words.txt and forbidden_words.txt. In the file words.txt contains text. In a text file forbidden_words.txt forbidden words are stored separated by a space character. Write a program that replaces in the file words.txt all forbidden words from the file forbidden_words.txt with asterisks * (the number of asterisks is equal to the number of letters in the word). The program must replace all forbidden words wherever they occur, even in the middle of another word. Case-insensitive substitution is performed.