#### Q1. Can you create a Python program that reads a text file and counts the number of words contained within it? The program should be designed to read the file and break it down into individual words, using spaces, punctuation marks, and other delimiters to separate the words. It should then count the number of words found in the file and display this count as output. The program should be flexible enough to work with different text files and should be able to handle a variety of formatting and punctuation styles.

In [16]:
import string

def count_words(file_path):
    # Define punctuation marks
    punctuation = string.punctuation

    # Initialize word count
    word_count = 0

    try:
        # Open the file in read mode
        with open(file_path, 'r') as file:
            # Read each line in the file
            for line in file:
                # Remove punctuation marks and split the line into words
                translation_table = str.maketrans('', '', punctuation)
                words = line.translate(translation_table).split()
                # Increment word count by the number of words in the line
                word_count += len(words)
    except FileNotFoundError:
        print("File not found!")
    except Exception as e:
        print("An error occurred:", e)

    return word_count

# Example usage
file_path = 'textfile.txt'  
total_words = count_words(file_path)
print("Total words in the file:", total_words)


Total words in the file: 38


### Q2. Could you help me create a Python program that can find and display the longest word in a text file? The program should be able to read any text file and separate its contents into individual words, taking into account various delimiters like spaces, punctuation marks, and other characters. Then, it should compare the length of each word and determine which one is the longest, and finally, print that word as output. The program should be versatile enough to work with various text files and be able to handle different formatting styles and punctuation.

In [15]:
import string

def find_longest_word(file_path):
    # Define punctuation marks
    punctuation = string.punctuation

    # Initialize variables
    longest_word = ''
    max_length = 0

    try:
        # Open the file in read mode
        with open(file_path, 'r') as file:
            # Read each line in the file
            for line in file:
                # Remove punctuation marks and split the line into words
                translation_table = str.maketrans('', '', punctuation)
                words = line.translate(translation_table).split()
                # Iterate through each word
                for word in words:
                    # Check if the length of the current word is greater than the maximum length found so far
                    if len(word) > max_length:
                        max_length = len(word)
                        longest_word = word
    except FileNotFoundError:
        print("File not found!")
    except Exception as e:
        print("An error occurred:", e)

    return longest_word

# Example usage
file_path = 'textfile.txt'  # Change this to the path of your text file
longest_word = find_longest_word(file_path)
print("Longest word in the file:", longest_word)


Longest word in the file: punctuation


### Q3. Write a Python program to read a text file and print out the most frequent word(s) in the file.

In [22]:
import string
from collections import Counter

def most_frequent_words(file_path):
    # Define punctuation marks
    punctuation = string.punctuation

    # Initialize a Counter object to count the occurrences of each word
    word_counter = Counter()

    try:
        # Open the file in read mode
        with open(file_path, 'r') as file:
            # Read each line in the file
            for line in file:
                # Remove punctuation marks and split the line into words
                translation_table = str.maketrans('', '', punctuation)
                words = line.translate(translation_table).split()
                # Update word_counter with the words in the line
                word_counter.update(words)
    except FileNotFoundError:
        print("File not found!")
        return
    except Exception as e:
        print("An error occurred:", e)
        return

    # Get the most common word(s) and their frequencies
    most_common = word_counter.most_common()
    # Find the maximum frequency
    max_frequency = most_common[0][1]
    # Filter words with maximum frequency
    most_frequent_words = [word for word, freq in most_common if freq == max_frequency]

    return most_frequent_words, max_frequency

# Example usage
file_path = 'textfile.txt'  # Change this to the path of your text file
result = most_frequent_words(file_path)
if result:
    most_frequent_words, max_frequency = result
    print("Most frequent word(s) in the file:", most_frequent_words)
    print("Frequency:", max_frequency)


Most frequent word(s) in the file: ['is', 'file', 'punctuation', 'the']
Frequency: 2


In [21]:
from collections import Counter
# Sample text
text = "apple banana apple orange banana grape orange carrot shan apple banana"
# Initialize a Counter object to count the occurrences of each word
word_counter = Counter()
# Split the text into words
words = text.split()
# Update word_counter with the words
word_counter.update(words)
# Print the contents of word_counter
# Get the most common word(s) and their frequencies
most_common = word_counter.most_common()
# Find the maximum frequency
max_frequency = most_common[0][1]
# Filter words with maximum frequency
most_frequent_words = [word for word, freq in most_common if freq == max_frequency]
print(most_frequent_words)
print(most_common)

['apple', 'banana']
[('apple', 3), ('banana', 3), ('orange', 2), ('grape', 1), ('carrot', 1), ('shan', 1)]


In [28]:
# Sample most_common list with more elements
most_common = [('apple', 5), ('banana', 3), ('orange', 2), ('grape', 2), ('kiwi', 1)]

# Extract the frequency of the most common word
max_frequency = most_common[0][1]

print("Frequency of the most common word:", max_frequency)


Frequency of the most common word: 5


### Q4. How can you use Python to count the number of rows in a CSV file?

In [31]:
import csv

def count_csv_rows(file_path):
    try:
        # Open the CSV file in read mode
        with open(file_path, 'r', newline='') as file:
            # Create a CSV reader object
            csv_reader = csv.reader(file)
            
            # Use the len() function to count the rows
            row_count = len(list(csv_reader))
            
            return row_count
    except FileNotFoundError:
        print("File not found!")
    except Exception as e:
        print("An error occurred:", e)

# Example usage
file_path = 'example.csv'  # Change this to the path of your CSV file
row_count = count_csv_rows(file_path)
print("Number of rows in the CSV file:", row_count)


Number of rows in the CSV file: 5


### Q5. How can you use Python to calculate the average of a specific column in a CSV file?

In [42]:
import csv

def calculate_column_average(file_path, column_index):
    try:
        # Open the CSV file in read mode
        with open(file_path, 'r', newline='') as file:
            # Create a CSV reader object
            csv_reader = csv.reader(file)
            
            # Initialize variables
            total = 0
            count = 0
            
            # Iterate over each row in the CSV file
            for row in csv_reader:
                # Skip the header row
                if csv_reader.line_num == 1:
                    continue
                
                # Try to convert the column value to a float and add it to the total
                try:
                    value = float(row[column_index])
                    total += value
                    count += 1
                except ValueError:
                    # If conversion fails, skip the row
                    continue
            
            # Calculate the average
            if count > 0:
                average = total / count
                return average
            else:
                return None
    except FileNotFoundError:
        print("File not found!")
    except IndexError:
        print("Invalid column index!")
    except Exception as e:
        print("An error occurred:", e)

# Example usage
file_path = 'example.csv'  # Change this to the path of your CSV file
column_index = 2  # Change this to the index of the column you want to calculate the average for
average = calculate_column_average(file_path, column_index)
if average is not None:
    print(f"Average of column {column_index}: {average}")
else:
    print("No data found or column is empty.")


Invalid column index!
No data found or column is empty.


In [47]:
import pandas as pd
df1 = pd.read_csv('example.csv')
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        15 non-null     object
 1   Age         15 non-null     int64 
 2   Salary      15 non-null     int64 
 3   Department  15 non-null     object
 4   Location    15 non-null     object
 5   Education   15 non-null     object
 6   Experience  15 non-null     int64 
 7   Position    15 non-null     object
 8   Email       15 non-null     object
 9   Phone       15 non-null     object
dtypes: int64(3), object(7)
memory usage: 1.3+ KB
