In [None]:
"""
Welcome to our City Analysis project. 
This project aims to provide insightful information about cities from the dataset we have. 
Our goal is to process, analyze and offer unique perspectives on city data, 
which is valuable to a wide range of audience, including individuals seeking for 
better places to live, governments to compare their cities, and businesses to find potential opportunities.

In the course of this project, we will be performing various operations, 
like parsing CSV data, manipulating lists, and writing to files, all through Python programming language.


The main functionalities we are providing include:

Reading and Processing the Dataset: This involves reading our data from a CSV file, 
parsing it into a readable format and then processing it for further use. 
This is a necessary step to convert raw data into a more structured and usable format.

Finding Similar Cities: Given a city, this function will find and list cities that are 
'better' based on certain parameters. We define 'better' as 
having lower values for the AQI Value, Ozone AQI Value, and PM2.5 AQI Value columns in our dataset.

Finding the Average and Identifying Top Performers: We will calculate the average for 
certain columns and find out cities which perform better than average. 
These cities are then written to a file for further use.

Finding Cities Starting with a Specific Letter: Here, we take a specific letter as an input, 
and find all cities that start with this letter. Additionally, we also find the cities from this 
list that perform better than the average.

The ultimate goal of this project is to perform an exploratory data analysis, 
which allows us to understand more about the cities based on the data we have. 
Our analyses could potentially help identify the best cities based on certain parameters, 
hence, assisting the decision-making processes.

By the end of this project, expect to have a CSV file named 'my_file.csv' 
that will include all the cities that are 'better' based on various criteria we defined above.
"""

In [8]:
# Define a function to read data from a file
def read_dataset(filename):
    # Open file in read mode
    with open(filename, 'r') as file:
        # Read lines from file and split each line by comma
        data = [i.strip().split(',') for i in file.readlines()]
    # Iterate over rows in the data starting from the second row
    for i in range(1, len(data)):
        # Iterate over columns in a row starting from the second column
        for j in range(2, len(data[i])):
            # If a cell contains a digit, convert its content to float
            if data[i][j].isdigit():
                data[i][j] = float(data[i][j])
    # Return the processed data
    return data

# Call the function with 'Countries.csv' as an argument and print the length of the dataset
dataset = read_dataset('Countries.csv')
print(len(dataset))


16696


In [4]:
# Define a function to find cities similar to the given city
def find_similar_cities(city_name):
    # Get the data row for the given city name
    city = [row for row in dataset if row[1].lower() == city_name.lower()]
    # If no city is found, return a corresponding message
    if not city:
        return 'City is not found.'
    city = city[0]

    # Initialize an empty list for better cities
    better_cities = []
    # Iterate over each row in the dataset
    for row in dataset[1:]:
        # If the current city is 'better' than the given city, add it to the better cities list
        if row[2] < city[2] and row[6] < city[6] and row[10] < city[10]:
            better_cities.append(row)

    # If there are better cities
    if better_cities:
        # Sort the better cities list by the second column
        better_cities.sort(key=lambda x: x[2])

        # Open a file to append data
        with open('my_file.csv', 'a') as file:
            # Iterate over each city in better cities
            for city in better_cities:
                # Write the city data to the file and print it
                file.write(','.join(map(str, city)) + '\n')   
                print(','.join(map(str, city)) + '\n')
    else:
        # Print a message if no better city is found
        print('It seems there is no city better than yours!')

# Call the function with 'Moscow' as an argument
find_similar_cities('moscow')

Ecuador,Macas,7.0,Good,1.0,Good,7.0,Good,0.0,Good,7.0,Good,-2.3667,-78.1333

Ecuador,Azogues,8.0,Good,0.0,Good,7.0,Good,0.0,Good,8.0,Good,-2.7333,-78.8333

Papua New Guinea,Tari,8.0,Good,0.0,Good,8.0,Good,0.0,Good,7.0,Good,-5.8489,142.9506

Peru,Huaraz,9.0,Good,0.0,Good,9.0,Good,0.0,Good,8.0,Good,-9.5333,-77.5333

Ecuador,Nueva Loja,10.0,Good,1.0,Good,5.0,Good,0.0,Good,10.0,Good,0.0847,-76.8828

Peru,Huancavelica,10.0,Good,0.0,Good,2.0,Good,1.0,Good,10.0,Good,-12.7864,-74.9756

Indonesia,Manokwari,10.0,Good,0.0,Good,10.0,Good,0.0,Good,10.0,Good,-0.8667,134.0833

Peru,Puquio,11.0,Good,0.0,Good,11.0,Good,0.0,Good,5.0,Good,-14.6939,-74.1241

Argentina,Comodoro Rivadavia,11.0,Good,0.0,Good,11.0,Good,1.0,Good,4.0,Good,-45.8647,-67.4808

Papua New Guinea,Mendi,11.0,Good,0.0,Good,11.0,Good,0.0,Good,5.0,Good,-6.1478,143.6572

Argentina,Puerto Madryn,11.0,Good,0.0,Good,11.0,Good,0.0,Good,7.0,Good,-42.7667,-65.05

Brazil,Andradina,11.0,Good,1.0,Good,11.0,Good,0.0,Good,11.0,Good,-20.8958,-51.3789

In [15]:
# Define a function to find the average of certain columns and write to file
def find_average_and_write():
    # Initialize a list for storing average values
    average_values = []
    # Iterate over columns with a step of 4 starting from the second column
    for i in range(2, 11, 4):
        # Initialize a variable to keep track of the sum
        overall_sum = 0
        # Iterate over each row in the dataset
        for row in dataset[1:]:
            # Add the value of the current column to the overall sum
            overall_sum += row[i]
        # Calculate the average value and append it to the list
        average_values.append(overall_sum / len(dataset[1:])) 
        
    # Initialize a list for storing rows that are better than average
    better_than_average = []
    # Iterate over each row in the dataset
    for row in dataset[1:]:
        # If the values in certain columns are better than the average, add the row to the list
        if row[2] < average_values[0] and row[6] < average_values[1] and row[10] < average_values[2]:
            better_than_average.append(row)
    
    # If there are rows better than average
    if better_than_average:
        # Open a file in append mode
        with open('my_file.csv', 'a') as file:
            # Iterate over each row in the list
            for city in better_than_average:
                # Write the row to the file and print it
                file.write(','.join(map(str, city)) + '\n')   
                print(','.join(map(str, city)) + '\n')

# Call the function
find_average_and_write()

Brazil,Presidente Dutra,41.0,Good,1.0,Good,5.0,Good,1.0,Good,41.0,Good,-5.29,-44.49

Brazil,Presidente Dutra,41.0,Good,1.0,Good,5.0,Good,1.0,Good,41.0,Good,-11.2958,-41.9869

United States of America,Punta Gorda,54.0,Moderate,1.0,Good,14.0,Good,11.0,Good,54.0,Moderate,16.1005,-88.8074

United States of America,Punta Gorda,54.0,Moderate,1.0,Good,14.0,Good,11.0,Good,54.0,Moderate,26.8941,-82.0513

Netherlands,Raalte,41.0,Good,1.0,Good,24.0,Good,6.0,Good,41.0,Good,52.3833,6.2667

France,Raismes,59.0,Moderate,1.0,Good,30.0,Good,4.0,Good,59.0,Moderate,50.3892,3.4858

Poland,Piaseczno,28.0,Good,1.0,Good,28.0,Good,2.0,Good,28.0,Good,52.0667,21.0167

Russian Federation,Polevskoy,31.0,Good,1.0,Good,31.0,Good,0.0,Good,17.0,Good,56.45,60.1833

Indonesia,Pontianak,44.0,Good,1.0,Good,15.0,Good,0.0,Good,44.0,Good,-0.0206,109.3414

Brazil,Porecatu,30.0,Good,1.0,Good,9.0,Good,2.0,Good,30.0,Good,-22.7558,-51.3789

Finland,Pori,30.0,Good,1.0,Good,30.0,Good,1.0,Good,15.0,Good,61.4833,21.8

United States 

In [10]:
# Define a function to find cities starting with a given letter
def find_cities_start_with(letter):
    # Initialize a list for storing cities starting with the given letter
    cities_start_with_letter = []
    
    # Iterate over each row in the dataset
    for row in dataset:
        # If the city name starts with the given letter, add the row to the list
        if row[1].lower().startswith(letter.lower()):
            cities_start_with_letter.append(row)
    
    # If there are cities starting with the given letter
    if cities_start_with_letter:
        # Initialize a list for storing average values
        average_values = []
        # Iterate over columns with a step of 4 starting from the second column
        for i in range(2, 11, 4):
            # Initialize a variable to keep track of the sum
            overall_sum = 0
            # Iterate over each row in the list
            for row in cities_start_with_letter:
                # Add the value of the current column to the overall sum
                overall_sum += row[i]
            # Calculate the average value and append it to the list
            average_values.append(overall_sum / len(cities_start_with_letter))

        # Initialize a list for storing cities better than the average
        better_cities = []
        # Iterate over each row in the list
        for row in cities_start_with_letter:
            # If the values in certain columns are better than the average, add the row to the list
            if row[2] < average_values[0] and row[6] < average_values[1] and row[10] < average_values[2]:
                better_cities.append(row)

        # If there are cities better than average
        if better_cities:
            # Open a file in append mode
            with open('my_file.csv', 'a') as file:
                # Iterate over each row in the list
                for city in better_cities:
                    # Write the row to the file and print it
                    file.write(','.join(map(str, city)) + '\n')   
                    print(','.join(map(str, city)) + '\n')
    else:
        # Print a message if no cities start with the given letter
        print('It seems there are no such cities')

# Call the function with 'a' as an argument
find_cities_start_with('a')

Belgium,Alken,35.0,Good,1.0,Good,25.0,Good,3.0,Good,35.0,Good,50.8761,5.3078

Canada,Alma,55.0,Moderate,1.0,Good,27.0,Good,0.0,Good,55.0,Moderate,48.55,-71.65

Canada,Alma,55.0,Moderate,1.0,Good,27.0,Good,0.0,Good,55.0,Moderate,43.3799,-84.6556

Papua New Guinea,Alotau,28.0,Good,0.0,Good,28.0,Good,0.0,Good,13.0,Good,-10.3167,150.4333

Madagascar,Ambovombe,26.0,Good,0.0,Good,26.0,Good,0.0,Good,20.0,Good,-25.1764,46.0833

Madagascar,Antalaha,22.0,Good,0.0,Good,22.0,Good,0.0,Good,20.0,Good,-14.8833,50.25

Russian Federation,Arkadak,28.0,Good,1.0,Good,28.0,Good,1.0,Good,15.0,Good,51.9333,43.5

Russian Federation,Arsk,28.0,Good,1.0,Good,28.0,Good,1.0,Good,19.0,Good,56.1,49.8833

Armenia,Ashtarak,39.0,Good,1.0,Good,29.0,Good,1.0,Good,39.0,Good,40.2975,44.3617

United Kingdom of Great Britain and Northern Ireland,Ayr,29.0,Good,1.0,Good,29.0,Good,2.0,Good,20.0,Good,55.458,-4.629

United Kingdom of Great Britain and Northern Ireland,Ayr,29.0,Good,1.0,Good,29.0,Good,2.0,Good,20.0,Good,-19.5744,1