In [1]:
import pandas as pd
from tqdm import tqdm
import re

In [17]:
categories = {
    "Genres": [
        "Fantasy", "Science Fiction", "Mystery", "Romance", "Thriller", "Horror",
        "Historical Fiction", "Non-Fiction", "Biography", "Memoir", "Young Adult",
        "Children's", "Adventure", "Crime", "Dystopian", "Paranormal", "Comedy",
        "Drama", "Graphic Novel", "Poetry"
    ],
    "Themes": [
        "Coming of Age", "Friendship", "Love", "Revenge", "Survival", "Good vs. Evil",
        "Betrayal", "Redemption", "Courage", "Identity", "War", "Family", "Justice",
        "Freedom", "Power", "Loss", "Time Travel", "Forbidden Love", "Magic",
        "Moral Dilemma"
    ],
    "Settings": [
        "Medieval", "Futuristic", "Post-Apocalyptic", "Urban", "Rural", "Space",
        "Underwater", "Alternate Universe", "Steampunk", "Cyberpunk", "High Fantasy World",
        "Desert", "Arctic", "Ancient World", "Contemporary", "Dystopian Society",
        "Mythical Lands", "Virtual Reality", "Historical Era", "Alien Planet"
    ],
    "Tone/Mood": [
        "Dark", "Humorous", "Inspirational", "Suspenseful", "Emotional", "Uplifting",
        "Gritty", "Melancholic", "Optimistic", "Satirical", "Whimsical", "Tense",
        "Heartwarming", "Bittersweet", "Mysterious", "Dramatic", "Reflective",
        "Romantic", "Action-Packed", "Chill"
    ],
    "Audience": ["Adult", "Young Adult", "Middle Grade", "Children", "All Ages"],
    "Writing Style": [
        "First-Person Narrative", "Third-Person Narrative", "Stream of Consciousness",
        "Epistolary", "Non-Linear", "Multiple Perspectives", "Descriptive",
        "Dialogue-Driven", "Minimalist", "Lyrical", "Fast-Paced", "Slow-Burn",
        "Plot-Driven", "Character-Driven"
    ],
    "Features": [
        "Magic", "Superpowers", "Vampires", "Werewolves", "Dragons", "Aliens",
        "Robots", "Time Travel", "Mystery Solving", "Quest", "Political Intrigue",
        "Heists", "Detective Work", "Espionage", "Mythology", "Ghosts", "Zombies",
        "Pirates", "Supernatural Elements", "Technology"
    ],
    "Time Period": [
        "Prehistoric", "Ancient", "Medieval", "Renaissance", "18th Century",
        "19th Century", "Early 20th Century", "World War I", "World War II",
        "Cold War", "Contemporary", "Near Future", "Distant Future", "Timeless"
    ],
}

def parse_tags_from_text(text, categories):
    parsed_tags = {category: [] for category in categories.keys()}
    current_category = None

    for line in text.splitlines():
        for category in categories.keys():
            if re.match(rf"^###?\s*{category}", line, re.IGNORECASE):
                current_category = category
                break
        else:
            if current_category:
                tag = line.strip("-* ").strip()
                if tag:
                    parsed_tags[current_category].append(tag)

    return parsed_tags

def apply_parsing_to_dataset(df, categories):
    for category in categories.keys():
        df[category] = None

    for index, row in df.iterrows():
        parsed_tags = parse_tags_from_text(row['tags'], categories)
        for category, tags in parsed_tags.items():
            df.at[index, category] = ", ".join(tags)

    return df


In [15]:
file_path = "/content/with_tags_dataset.csv"  # Укажите путь к вашему файлу
data = pd.read_csv(file_path)
data

Unnamed: 0,Title,description,authors,image,previewLink,publisher,publishedDate,infoLink,categories,popularity,avgScore,tags
0,An inquiry into the nature and causes of the w...,The annual labour of every nation is the fund ...,['Adam Smith'],http://books.google.com/books/content?id=C5dNA...,http://books.google.com/books?id=C5dNAAAAcAAJ&...,London : Printed for W. Strahan and T. Cadell,1776-01-01,https://play.google.com/store/books/details?id...,['Capitalism'],598,4.073579,"Here are the tags that fit best for the book ""..."
1,The No. 1 Ladies' Detective Agency,Wayward daughters. Missing Husbands. Philander...,['Alexander McCall Smith'],http://books.google.com/books/content?id=CCSQP...,http://books.google.nl/books?id=CCSQPwAACAAJ&d...,Abacus Software,2008-01-01,http://books.google.nl/books?id=CCSQPwAACAAJ&d...,['Botswana'],509,4.322200,Here are the chosen tags:\n\n### Genres\n\n- M...
2,"Whitney, My Love",Let New York Times bestselling author Judith M...,['Judith McNaught'],http://books.google.com/books/content?id=WxsmD...,http://books.google.com/books?id=WxsmDQAAQBAJ&...,Simon and Schuster,2016-11-01,https://play.google.com/store/books/details?id...,['Fiction'],341,3.985337,Here are the chosen tags:\n\n### **Genres**\n\...
3,The Tipping Point: How Little Things Can Make ...,An introduction to the Tipping Point theory ex...,['Malcolm Gladwell'],http://books.google.com/books/content?id=w6V_P...,http://books.google.com/books?id=w6V_PwAACAAJ&...,"Wheeler Publishing, Incorporated",2003-01-01,http://books.google.com/books?id=w6V_PwAACAAJ&...,['Reference'],1295,4.128185,Here are the chosen tags:\n\n### Genres\n\n- N...
4,Women Who Make the World Worse : and How Their...,An analysis of how feminists may be compromisi...,"['""Kate OBeirne""']",http://books.google.com/books/content?id=e0EFA...,http://books.google.com/books?id=e0EFAQAAIAAJ&...,,2006-01-01,http://books.google.com/books?id=e0EFAQAAIAAJ&...,['Social Science'],358,3.740223,Here are the chosen tags:\n\n### Genres\n\n* N...
...,...,...,...,...,...,...,...,...,...,...,...,...
995,Picture of Dorian Grey (Classic Library),,['Oscar Wilde'],http://books.google.com/books/content?id=w9A98...,http://books.google.com/books?id=w9A98UIGNMAC&...,Bernhard Tauchnitz,1908-01-01,https://play.google.com/store/books/details?id...,,592,4.280405,### **Genres**\n\n- Classic\n- Fantasy\n- Horr...
996,Awakening: Kate Chopin Pb (Case Studies in Con...,,['New York Public Library Staff'],http://books.google.com/books/content?id=SsoiA...,http://books.google.nl/books?id=SsoiAQAAIAAJ&p...,Macmillan Reference USA,1999-08-01,http://books.google.nl/books?id=SsoiAQAAIAAJ&d...,,308,3.905844,Here are the tags that fit best:\n\n### Genres...
997,Chariots of the Gods? Unsolved Mysteries of th...,The author attempts to explain such perplexing...,['Erich von Diken'],http://books.google.com/books/content?id=m08EE...,http://books.google.com/books?id=m08EEAAAQBAJ&...,Berkley,1999-01-01,http://books.google.com/books?id=m08EEAAAQBAJ&...,['History'],301,3.813953,Here are the chosen tags:\n\n### Genres\n\n- N...
998,The Little Prince,,['Antoine de Saint-Exupéry' ' SBP Editors'],http://books.google.com/books/content?id=6948D...,http://books.google.com/books?id=6948DwAAQBAJ&...,Samaira Book Publishers,2017-11-04,https://play.google.com/store/books/details?id...,['Juvenile Fiction'],490,4.383673,### Genres\n\n- Classic\n- Fantasy\n- Young Ad...


In [18]:
updated_data = apply_parsing_to_dataset(data, categories)
updated_data.to_csv("parsed_tags_dataset.csv", index=False)
updated_data

Unnamed: 0,Title,description,authors,image,previewLink,publisher,publishedDate,infoLink,categories,popularity,avgScore,tags,Genres,Themes,Settings,Tone/Mood,Audience,Writing Style,Features,Time Period
0,An inquiry into the nature and causes of the w...,The annual labour of every nation is the fund ...,['Adam Smith'],http://books.google.com/books/content?id=C5dNA...,http://books.google.com/books?id=C5dNAAAAcAAJ&...,London : Printed for W. Strahan and T. Cadell,1776-01-01,https://play.google.com/store/books/details?id...,['Capitalism'],598,4.073579,"Here are the tags that fit best for the book ""...","Non-Fiction, Economics","Economy, Wealth, Labor, Consumption, Productio...",Historical Era (18th century),"Analytical, Instructive, Informative, Serious","Adult, Academic","Descriptive, Analytical, Informative, Stream o...","Economic concepts, Historical context",18th century
1,The No. 1 Ladies' Detective Agency,Wayward daughters. Missing Husbands. Philander...,['Alexander McCall Smith'],http://books.google.com/books/content?id=CCSQP...,http://books.google.nl/books?id=CCSQPwAACAAJ&d...,Abacus Software,2008-01-01,http://books.google.nl/books?id=CCSQPwAACAAJ&d...,['Botswana'],509,4.322200,Here are the chosen tags:\n\n### Genres\n\n- M...,"Mystery, Detective Fiction","Mystery Solving, Friendship, Family, Identity","Contemporary, Botswana","Humorous, Suspenseful, Heartwarming","Adult, All Ages","Third-Person Narrative, Descriptive, Dialogue-...","Detective Work, Mystery Solving, Quest",Contemporary
2,"Whitney, My Love",Let New York Times bestselling author Judith M...,['Judith McNaught'],http://books.google.com/books/content?id=WxsmD...,http://books.google.com/books?id=WxsmDQAAQBAJ&...,Simon and Schuster,2016-11-01,https://play.google.com/store/books/details?id...,['Fiction'],341,3.985337,Here are the chosen tags:\n\n### **Genres**\n\...,,,,,,,,
3,The Tipping Point: How Little Things Can Make ...,An introduction to the Tipping Point theory ex...,['Malcolm Gladwell'],http://books.google.com/books/content?id=w6V_P...,http://books.google.com/books?id=w6V_PwAACAAJ&...,"Wheeler Publishing, Incorporated",2003-01-01,http://books.google.com/books?id=w6V_PwAACAAJ&...,['Reference'],1295,4.128185,Here are the chosen tags:\n\n### Genres\n\n- N...,Non-Fiction,"Self-Improvement, Social Change, Ideas",Contemporary,"Inspirational, Thought-Provoking",Adult,"Informative, Descriptive, Analytical",,
4,Women Who Make the World Worse : and How Their...,An analysis of how feminists may be compromisi...,"['""Kate OBeirne""']",http://books.google.com/books/content?id=e0EFA...,http://books.google.com/books?id=e0EFAQAAIAAJ&...,,2006-01-01,http://books.google.com/books?id=e0EFAQAAIAAJ&...,['Social Science'],358,3.740223,Here are the chosen tags:\n\n### Genres\n\n* N...,"Non-Fiction, Memoir, Biography","Power, Identity, Betrayal, Redemption, Freedom...",Contemporary,"Suspenseful, Emotional, Dark",Adult,First-Person Narrative,,Contemporary
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,Picture of Dorian Grey (Classic Library),,['Oscar Wilde'],http://books.google.com/books/content?id=w9A98...,http://books.google.com/books?id=w9A98UIGNMAC&...,Bernhard Tauchnitz,1908-01-01,https://play.google.com/store/books/details?id...,,592,4.280405,### **Genres**\n\n- Classic\n- Fantasy\n- Horr...,,,,,,,,
996,Awakening: Kate Chopin Pb (Case Studies in Con...,,['New York Public Library Staff'],http://books.google.com/books/content?id=SsoiA...,http://books.google.nl/books?id=SsoiAQAAIAAJ&p...,Macmillan Reference USA,1999-08-01,http://books.google.nl/books?id=SsoiAQAAIAAJ&d...,,308,3.905844,Here are the tags that fit best:\n\n### Genres...,"Non-Fiction, Biography, Memoir","Identity, Power, Justice, Loss",Contemporary,"Reflective, Emotional",Adult,"Non-Linear, Multiple Perspectives",,Contemporary
997,Chariots of the Gods? Unsolved Mysteries of th...,The author attempts to explain such perplexing...,['Erich von Diken'],http://books.google.com/books/content?id=m08EE...,http://books.google.com/books?id=m08EEAAAQBAJ&...,Berkley,1999-01-01,http://books.google.com/books?id=m08EEAAAQBAJ&...,['History'],301,3.813953,Here are the chosen tags:\n\n### Genres\n\n- N...,"Non-Fiction, History, Mystery","Discovery, Exploration, Investigation, Unexpla...","Historical Era, Ancient World, Archaeological ...","Informative, Curious, Investigative",Adult,"Descriptive, Informative","Historical Events, Archaeological Discoveries,...",Ancient
998,The Little Prince,,['Antoine de Saint-Exupéry' ' SBP Editors'],http://books.google.com/books/content?id=6948D...,http://books.google.com/books?id=6948DwAAQBAJ&...,Samaira Book Publishers,2017-11-04,https://play.google.com/store/books/details?id...,['Juvenile Fiction'],490,4.383673,### Genres\n\n- Classic\n- Fantasy\n- Young Ad...,"Classic, Fantasy, Young Adult, Children's, Adv...","Friendship, Love, Redemption, Courage, Identit...","Futuristic, Space, Desert, Alien Planet","Inspirational, Emotional, Uplifting, Melanchol...","Children, Young Adult, All Ages","Third-Person Narrative, Minimalist, Lyrical, C...",,Timeless
