In [None]:
from transformers import BartTokenizer, BartForConditionalGeneration
import numpy as np
import re


# Initialize BART model and tokenizer
model_name = 'facebook/bart-large-cnn'
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

def preprocess_text(text):
    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces/newlines with a single space
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = text.lower()  # Convert to lowercase
    return text

def summarize_text(text, max_length=1000, min_length=40):
    inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(inputs['input_ids'], max_length=max_length, min_length=min_length, length_penalty=2.0, num_beams=4)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

def split_into_sections(text, section_length=1000, overlap=200):
    text = preprocess_text(text)
    sections = []
    start = 0
    while start < len(text):
        end = start + section_length
        sections.append(text[start:end])
        start = end - overlap
    return sections

def summarize_sections(sections):
    return [summarize_text(section) for section in sections]

def combine_summaries(summaries):
    # Generate a coherent final summary by concatenating the section summaries
    combined_summary = "\n\n".join(summaries)
    return combined_summary

def main(text):
    sections = split_into_sections(text)
    summaries = summarize_sections(sections)
    final_summary = combine_summaries(summaries)
    return final_summary

text = """Hello all, my name is Krushnayak and welcome to my YouTube channel. So guys before the start of every new year, I usually plan that what all skill sets I really need to add in my bucket list so that I'll be able to teach you all. I will be also able to show you that how specifically it is used in industries. In 2023, you know, I had targeted various MLops platform and if you probably see my videos in my YouTube channel, I've covered a lot of end to end projects. I have covered a lot many MLops platform tools, you know, and I've shown each and everything, which was really beneficial for many people out there who really want to make successful career transition into the data science industry. And MLops, I think in 2024 also it will play a very important role since right now LLM applications are basically coming up, you know, generative AI is there. So many tools in generative AI have actually come frameworks, libraries and many more things. So MLops will still be there in 2024. But my main aim in 2023 was to understand all about different MLops platform and try to create videos, try to upgrade myself with respect to all those skill sets and provide those videos to you. This was very beneficial. Many people were able to make successful career transition. Now in 2024, I'm also going to focus. See in 2023 also from past three to four months, I'm really much focused in generative AI solving amazing use cases, trying to see multiple frameworks, trying to see multiple LLM models, how you can fine tune it, how you can probably use it in business use cases. And based on that, I'm also creating projects. But majorly in 2024, right, the weightage between MLops and generative AI, I will try to put my most of the weightage in understanding different, different frameworks, cloud platforms, techniques in business use cases in the field of generative AI. So in this video, like if you are also interested and obviously you can see the growth in generative area, how it is happening every other day, some new things are actually coming, right? Let it be models, let it be LLM models, the recent launch of Google Gemini. I know that Gemini did not provide you the right kind of demo video. But other than that, you have Mistral 7B, you have OpenAI, GPD for turbo, right? All these models are specifically coming. Now people are using this in creating amazing AI applications. And you can see there are a lot of startups from image to text, text to image. There are so many different things. I was just seeing one AI platform today, you know, it is basically called as Pickalabs. You just give a prompt and it will try to create an amazing video for you, right? And just imagine you don't even require a video editing any person over there, right? And this is super easy, right? When you probably see this kind of application. So 2024, I'm going to completely focus most of my, if I say 100% from 100%, I'm going to focus 60 to 70% in generative AI and 30% with respect to MLOps platform. And don't worry, every videos that I probably learn will be coming in the form of YouTube channel. But in this video, if you are also really interested to understand about generative AI, I've created an amazing roadmap to learn generative AI in 2024. And this will be specifically for two kinds of roles, which I'm actually going to discuss. And this roadmap, trust me, it has some prerequisites. I'm also going to provide you some videos over here, because if you just cover those prerequisites, that will be more than sufficient to start with generative AI. Yeah. And this is for all those people who are really interested to work in the data field, right? Data analytics field. If you're a developer, if you probably want to work into the generative AI, I think you don't even require this prerequisite. And I'll also call out that specific information over here in my GitHub. Once I probably show you the specific roadmap. So let me go ahead and let me share my screen. So this is the roadmap to learn generative AI. So here you can probably see everything is there. I've provided videos link, I have everything, the prerequisites that is actually required. Everything is mentioned over here. And please make sure that you fork this repository, keep a star on it, because 2024, I am going to really create a lot many things over here. It'll be quite amazing, right? Many, many things will be coming with respect to projects, with respect to frameworks, with respect to what are things I'm going to specifically cover in Lanch in everything, right? So let's start and let's understand this particular roadmap. Before you go ahead, please make sure that you hit like for this particular video will target at 2000 likes at least, you know, because this will be super beneficial because here you're going to get all the videos, all the materials, everything in front of you with respect to the prerequisites. Now I will talk about two different profiles over here. One profile is that if you want to get into the data analytics industry, and let's say you want to probably start if you're starting from scratch, okay, starting from scratch, or from past two years, you're also already in the data analytics field, you're learning about data science, what kind of upgradation you can basically do. The second type of people will be core developers, right? So in my company also, we have used generative AI, we are creating support systems, we are creating assignment, Q&A systems, many more things are there. Right now those developers are specifically creating and they don't have need to have all the knowledge with respect to some kind of prerequisites as such directly they can start with generative AI. So I will be talking about both of them step by step will try to understand. So the roadmap to learn about generative AI in 2024 is that first of all, you start with one programming language. I will again prefer Python programming language because trust me any LLM models that is probably coming from hugging face to open AI to Mistral everywhere Python SDK is there, right? So Python SDK with the help of Python programming language, you will be able to access those API's, you'll be able to probably work on them, you will be able to probably implement any kind of applications. And not only that, you'll also be able to deploy because they are good cloud platforms that are specifically come with respect to that. Okay, so here, Python programming language, I have given this particular link, see, I've already created Python programming language in English in Hindi, we'll be able to probably find out all this particular videos. I've already shown you this particular videos, good videos, if you probably see there are millions and millions of views with respect to this all videos. Again at the end of the day, this will be super beneficial for everyone out there, right? And not only this Python, if you just want to also become a Python programmer, everything is covered from modular coding to inheritance to oops constant, everything is probably explained along with materials. So this is the first thing. Now here, in third and fourth, I have also added some frameworks playlist, right? Like what like Flask playlist, fast API tutorials and all, right? So these are frameworks, streamlet is one framework, different different frameworks you can specifically use, okay? It is up to you. See, I've created videos on streamlet also I've created videos on Gradio also, but I've just given two frameworks over here Flask and fast API to just give you an idea. If you search for streamlet in my YouTube channel, you'll be able to get those videos also. So Python is the first thing, okay? Usually if you are from another like other like core, if you are directly a developer, you have working in applications, you're creating like full stack web developer, you're having those kind of position. And let's say someone comes and tells you, okay, go ahead and apply this generative way. I think in our project, try to create a chat bot, try to create something. At that time, you don't require Python, whatever will be a core language like JavaScript or you can go ahead with that because that kind of SDK is also provided. But this is specifically with respect to the data field. Now coming to the next thing, see, over here, the roadmap that I'm talking about generative AI, this is with respect to LLM models, I'm not talking about large image models, okay? What I feel is that computer vision, large image models is also good. But the kind of kind of update, upgradation that are specifically happening is with respect to LLM models. This is the one thing that I'm talking about this roadmap, I really want to make it for understanding like how you can become better in generative AI in the field of LLM applications, okay, large language model. Whenever I say large language model, I'm basically talking about NLP. Okay? I am not exploring much in large image models because computer vision, I too don't have that much interest. So in my YouTube channel, so you'll be finding videos that I have not uploaded much with respect to computer vision, okay? But I'm really interested in NLP. So after completing Python, you will basically be having the basic machine learning natural language processing. Again, this is basically the prerequisites, okay? And here I've given, I've taken already natural language processing live session where I've covered in five days all these topics, YNLP, one-hot encoding, bag of words, TF-IDF, word to beck, average word to beck, and there are a lot many topics that I've actually covered, which you will be able to find over here from day one to day five. So if I probably go ahead and open this particular video here from day one to day five, you'll be getting everything like word embedding, C bow, what is skip gram, word to beck. This five video will be able to help you understand because see, this is a very good roadmap, a prerequisite, a short roadmap. I'm not going to make a very big roadmap because this concept is basically used so that you understand them. What is vectors? You know, how we convert text into a numerical variable. All these things will be important for your interview because in the interview, they'll not directly are generated way, I think. They'll first of all see how good your basics is, right? So considering this, here I could add more topics over here based on this day one to day five, but your task is as a prerequisite is to basically see this day one to day five. And this also includes practical implementation. Now the third day, third part is that you need to cover basic deep learning concepts. Now when I say basic deep learning concepts, this is something related to ANN, like how does a multilayer neural network work? What is perceptron? What is forward propagation, backward propagation, activation function, loss function optimizers? What is weight initialization techniques? What is vanishing gradient problem? Everything right over here. Again I have not written much topic so that you don't look, it does not look very big for you and you don't get demotivated. But all these concepts, again I have made a live playlist in my community series that is regarding day one to day five. So if you probably open this link, okay. So here you'll be able to see day one to day five. After day five, if I see this deep learning concept. So what I will do, I will open my YouTube channel and I'll write Krishnayak live deep learning, okay. So I will update those link over there. And here you have, right, so this is basically your live deep learning sessions, okay. So here you can probably see this day one to day five. This is basically the prerequisite, right. All the important things, how does forward propagation work? What is chain rule of derivatives? What exactly is optimizers? What exactly is loss function? What is forward propagation, backward propagation? The CNN I have actually implemented and shown in the practical way. So this link I will try to update it over here, right. So in basic deep learning concept, this is must. Because unless you don't understand that, the further topics will not be able to understand. This is the smartest way that I've actually created a roadmap where unnecessary jargons need not be added, okay. Then after you complete this, then we go to the advanced NLP concept. Now advanced NLP concept is nothing but day six to last video. Which one? If you probably click this link, here you'll be able to see six to entire tilt transformer. So here you can see day six, day seven. Here I've discussed about RNN. Here I've discussed about back propagation in RNN, LHTMRNN, word embedding, LHTM practical implementation, advanced LNM series, bidirectional LHTM, transformers, encoder, decoder. Everything is basically covered in all the specific videos. So if you are able to cover this from day six to probably 13th or 14th video, all these topics will be easily covered. If you are not able to find some topic, go and search for that topic with my name over there. I have a lot of videos. I have 1800 plus videos in my YouTube channel. When I've covered everything and this is what I've done from past three years, all the basics concepts have made strong so that you can clear the interview. There is no query that you cannot clear the interview guys. The kind of content that I've put in my YouTube channel is completely from free scratch, everything from basics so that you learn any advanced thing. It will be very much easy for you to crack the interview. So that is what my advanced NLP concept says where my main aim was to cover the transformer because after that, whenever you start your generative AI, that basically means most of your models are in the form of transformers or BERT, right? So here you have GPT-4, Mistral 7B. So here is where you probably start your journey. This is what is the prerequisite till here, right? And that is the reason I've created this prerequisite in this specific way. And it is simple. It's more about learn to the point, right? Don't waste your time much. Learn to the point, see some practical implementation because I don't think so this kind of practical implementation you will be doing in the industry also. But yes, in the interview, they may ask you now for those people who are directly developer, they can also jump in this particular topic, right? They can directly start from here because at the end of the day, they are very good at development, you know, who are full stack web developer who are working in the software engineering field, they can directly use these APIs and they can implement it. Now if I talk about some important things over here, right? Starting the journey towards generative AI. You need to really be very much open minded in probably doing a research on all these models, right? So Mistral 7b, Lama, Lama index, hugging face, open source libraries, Google Palm model and all, right? I've written over here because there are a lot of updates that are coming with respect to the specific models. And I'll tell you GPT-4 is must, hugging face is must, Lama, Mistral 7b, Mistral 7b has now recently come up with one amazing model. Again, we say it as a 87 cross B, right? Something like that. I'll be talking more about those models as we go ahead, right? And also try to see open source LNM models because with all this help of these models, you know, you can develop any business use cases that are specifically related to NLP, right? NLP, natural language processing, any use cases, chatbot to text summarization to documentation, anything, quiz, anything. So that is specifically required for the companies, you will be able to do this, right? And that is the reason why many companies are specifically using it. Large image models also you can use because all this large image models they'll be providing your diffusion models, they'll be providing your Dali, right? And based on that, you can do all further on top of it. But really, I'm focusing more on LLMs right now. So the first thing is OpenAI. It has amazing documentation link. Have already created videos, good playlist of videos over here, you can definitely look it out. And this documentation you really need to be good at. And the best thing about this documentation in OpenAI, it provides a good amazing things, right? OpenAI, start with this. Videos are given, you can definitely refer it. This year, I'm going to target LangChain like anything. LangChain, already if you go and see my YouTube playlist, they are on 10 to 12 videos. But LangChain is one amazing library that acts as a wrapper on top of OpenAI. It can also use hugging face. It has so many different functionalities to create all the LLM models, not only this. So LangChain also provides deployment techniques like LangServ, chain rest as rest APIs. This is nothing but it is entirely called as LangSmith. So you can probably see this is your LangChain application. It may be in Python, it may be in JavaScript. On top of that, you will be creating some template, you will be doing the deployment in the form of APIs by using this LangSmith. So going forward, I really want to probably see this entire tool and deployment techniques also and it is very much it is recently been announced, right? Why I like LangChain is that because it has so many different functionalities over here, right? Different functionalities from chatbots to prompts to modules to probably if you go and see right, what is LangServ will be releasing a hosted version of LangServ from one click deployment. Just imagine just one click deployment. All these things we are going to explore like anything as we go ahead, right? In LLMs, what exactly is unsync API, dissolve, prompting, most of the videos that I have already created, but every day I see something is getting added in this documentation, right? Over here only, if you probably see with respect to retrieval, right? In document loader, like which all documents will be able to load like CSV, file directory, HTML, JSON, markdown, PDF, document transformer, how do you split the documents and all, everything. But most of this everything will be combined in the form of a project in the end to end project where I do the deployment in an amazing way, right? All those things will basically be covered and here is a detailed video. After I get a good expertise on LangChain, I will also be starting with Chainlit. So Chainlit is another one documentation which looks very good over here, right? Good you will be able to do everything with the help of LangChain, Lama index, here also it is supported. You will also be able to do the deployment, right? Everything is there. You can do it in AWS, Azure, Google route, Replate, render, fly.io, hugging face spaces, okay? So once you probably cover this, right now in these two years you can probably target OpenAI and LangChain and Chainlit, okay? That is what I am actually going to do and based on this I will try to create a lot of videos, okay? LangChain, again you can interact with hugging face models, Lama, Mistral 7b, anything as such, whatever you want, okay? The next topic that you really need to focus is about vector databases and vector stores. Now vector database will play a very important role and again this will again be a part of LangChain itself because in LangChain it provides you functionalities. Some of the vector databases that I have already explored is like ChromaDB, FIAS. It is FIAS vector database is nothing but which makes use of Facebook AI similarity search library, right? So this is coming from Facebook. LandDB vector database based on the Lange data format. So there is a format which is called as Lange data set and again there you will be able to apply similarity search. Cassandra DB for storing vectors, Cassandra DB is also amazing, MongoDB is also amazing, right? At the end of the day, if you have any text you really want to convert that into vectors and since it is for the performance sake you really need to store it in some kind of vector database so that you can retrieve it and enjoy it. Then finally after doing this you have to probably do the deployment of LLM projects and this is what I am going to target. Within one month you will find all these videos in my YouTube channel, right? In AWS, Azure, LangSmith, LangSpa, HuggingFace, wherever you want to do the deployment. And finally guys if you really want to go ahead and check like how does, what is GenitiveAI, whether GenitiveAI is there for nano. I have given the course link over here, okay? This is a free community course where everybody can probably apply for it. You will find all the videos, materials and all and it is live series. Right now we are in day 5, day 6. So please go ahead and check it out, it is completely for free. See whether GenitiveAI is for you or not. And I hope if you are already in the data field I feel you really need to go towards GenitiveAI, right? At the end of the day. Because for the interview sake, yes, basic questions will be asked. But when you implement things in the production level, this all will be very much handy. And this is what is my main aim in 2024. I hope you like this particular video. Yes, please go ahead. Right now this is in private, I will make it public. So let me go ahead and make it public. So I will change it to public so that you can also access it. Make this repository public. And now one final thing is that I will also update this specific link of deep learning. So I hope you like this particular video. This was it from my side. I will see you all in the next video. Have a great day. Thank you and all. Take care. Bye bye."""


formatted_summary = main(text)
print(formatted_summary)


In [None]:
#without preprocessing like not removing puncuations 
from transformers import BartTokenizer, BartForConditionalGeneration

model_name = 'facebook/bart-large-cnn'
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

def summarize_text(text, max_length=1000, min_length=500):
    inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(inputs['input_ids'], max_length=max_length, min_length=min_length, length_penalty=2.0, num_beams=4)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Example usage
text = """Hello all, my name is Krushnayak and welcome to my YouTube channel. So guys before the start of every new year, I usually plan that what all skill sets I really need to add in my bucket list so that I'll be able to teach you all. I will be also able to show you that how specifically it is used in industries. In 2023, you know, I had targeted various MLops platform and if you probably see my videos in my YouTube channel, I've covered a lot of end to end projects. I have covered a lot many MLops platform tools, you know, and I've shown each and everything, which was really beneficial for many people out there who really want to make successful career transition into the data science industry. And MLops, I think in 2024 also it will play a very important role since right now LLM applications are basically coming up, you know, generative AI is there. So many tools in generative AI have actually come frameworks, libraries and many more things. So MLops will still be there in 2024. But my main aim in 2023 was to understand all about different MLops platform and try to create videos, try to upgrade myself with respect to all those skill sets and provide those videos to you. This was very beneficial. Many people were able to make successful career transition. Now in 2024, I'm also going to focus. See in 2023 also from past three to four months, I'm really much focused in generative AI solving amazing use cases, trying to see multiple frameworks, trying to see multiple LLM models, how you can fine tune it, how you can probably use it in business use cases. And based on that, I'm also creating projects. But majorly in 2024, right, the weightage between MLops and generative AI, I will try to put my most of the weightage in understanding different, different frameworks, cloud platforms, techniques in business use cases in the field of generative AI. So in this video, like if you are also interested and obviously you can see the growth in generative area, how it is happening every other day, some new things are actually coming, right? Let it be models, let it be LLM models, the recent launch of Google Gemini. I know that Gemini did not provide you the right kind of demo video. But other than that, you have Mistral 7B, you have OpenAI, GPD for turbo, right? All these models are specifically coming. Now people are using this in creating amazing AI applications. And you can see there are a lot of startups from image to text, text to image. There are so many different things. I was just seeing one AI platform today, you know, it is basically called as Pickalabs. You just give a prompt and it will try to create an amazing video for you, right? And just imagine you don't even require a video editing any person over there, right? And this is super easy, right? When you probably see this kind of application. So 2024, I'm going to completely focus most of my, if I say 100% from 100%, I'm going to focus 60 to 70% in generative AI and 30% with respect to MLOps platform. And don't worry, every videos that I probably learn will be coming in the form of YouTube channel. But in this video, if you are also really interested to understand about generative AI, I've created an amazing roadmap to learn generative AI in 2024. And this will be specifically for two kinds of roles, which I'm actually going to discuss. And this roadmap, trust me, it has some prerequisites. I'm also going to provide you some videos over here, because if you just cover those prerequisites, that will be more than sufficient to start with generative AI. Yeah. And this is for all those people who are really interested to work in the data field, right? Data analytics field. If you're a developer, if you probably want to work into the generative AI, I think you don't even require this prerequisite. And I'll also call out that specific information over here in my GitHub. Once I probably show you the specific roadmap. So let me go ahead and let me share my screen. So this is the roadmap to learn generative AI. So here you can probably see everything is there. I've provided videos link, I have everything, the prerequisites that is actually required. Everything is mentioned over here. And please make sure that you fork this repository, keep a star on it, because 2024, I am going to really create a lot many things over here. It'll be quite amazing, right? Many, many things will be coming with respect to projects, with respect to frameworks, with respect to what are things I'm going to specifically cover in Lanch in everything, right? So let's start and let's understand this particular roadmap. Before you go ahead, please make sure that you hit like for this particular video will target at 2000 likes at least, you know, because this will be super beneficial because here you're going to get all the videos, all the materials, everything in front of you with respect to the prerequisites. Now I will talk about two different profiles over here. One profile is that if you want to get into the data analytics industry, and let's say you want to probably start if you're starting from scratch, okay, starting from scratch, or from past two years, you're also already in the data analytics field, you're learning about data science, what kind of upgradation you can basically do. The second type of people will be core developers, right? So in my company also, we have used generative AI, we are creating support systems, we are creating assignment, Q&A systems, many more things are there. Right now those developers are specifically creating and they don't have need to have all the knowledge with respect to some kind of prerequisites as such directly they can start with generative AI. So I will be talking about both of them step by step will try to understand. So the roadmap to learn about generative AI in 2024 is that first of all, you start with one programming language. I will again prefer Python programming language because trust me any LLM models that is probably coming from hugging face to open AI to Mistral everywhere Python SDK is there, right? So Python SDK with the help of Python programming language, you will be able to access those API's, you'll be able to probably work on them, you will be able to probably implement any kind of applications. And not only that, you'll also be able to deploy because they are good cloud platforms that are specifically come with respect to that. Okay, so here, Python programming language, I have given this particular link, see, I've already created Python programming language in English in Hindi, we'll be able to probably find out all this particular videos. I've already shown you this particular videos, good videos, if you probably see there are millions and millions of views with respect to this all videos. Again at the end of the day, this will be super beneficial for everyone out there, right? And not only this Python, if you just want to also become a Python programmer, everything is covered from modular coding to inheritance to oops constant, everything is probably explained along with materials. So this is the first thing. Now here, in third and fourth, I have also added some frameworks playlist, right? Like what like Flask playlist, fast API tutorials and all, right? So these are frameworks, streamlet is one framework, different different frameworks you can specifically use, okay? It is up to you. See, I've created videos on streamlet also I've created videos on Gradio also, but I've just given two frameworks over here Flask and fast API to just give you an idea. If you search for streamlet in my YouTube channel, you'll be able to get those videos also. So Python is the first thing, okay? Usually if you are from another like other like core, if you are directly a developer, you have working in applications, you're creating like full stack web developer, you're having those kind of position. And let's say someone comes and tells you, okay, go ahead and apply this generative way. I think in our project, try to create a chat bot, try to create something. At that time, you don't require Python, whatever will be a core language like JavaScript or you can go ahead with that because that kind of SDK is also provided. But this is specifically with respect to the data field. Now coming to the next thing, see, over here, the roadmap that I'm talking about generative AI, this is with respect to LLM models, I'm not talking about large image models, okay? What I feel is that computer vision, large image models is also good. But the kind of kind of update, upgradation that are specifically happening is with respect to LLM models. This is the one thing that I'm talking about this roadmap, I really want to make it for understanding like how you can become better in generative AI in the field of LLM applications, okay, large language model. Whenever I say large language model, I'm basically talking about NLP. Okay? I am not exploring much in large image models because computer vision, I too don't have that much interest. So in my YouTube channel, so you'll be finding videos that I have not uploaded much with respect to computer vision, okay? But I'm really interested in NLP. So after completing Python, you will basically be having the basic machine learning natural language processing. Again, this is basically the prerequisites, okay? And here I've given, I've taken already natural language processing live session where I've covered in five days all these topics, YNLP, one-hot encoding, bag of words, TF-IDF, word to beck, average word to beck, and there are a lot many topics that I've actually covered, which you will be able to find over here from day one to day five. So if I probably go ahead and open this particular video here from day one to day five, you'll be getting everything like word embedding, C bow, what is skip gram, word to beck. This five video will be able to help you understand because see, this is a very good roadmap, a prerequisite, a short roadmap. I'm not going to make a very big roadmap because this concept is basically used so that you understand them. What is vectors? You know, how we convert text into a numerical variable. All these things will be important for your interview because in the interview, they'll not directly are generated way, I think. They'll first of all see how good your basics is, right? So considering this, here I could add more topics over here based on this day one to day five, but your task is as a prerequisite is to basically see this day one to day five. And this also includes practical implementation. Now the third day, third part is that you need to cover basic deep learning concepts. Now when I say basic deep learning concepts, this is something related to ANN, like how does a multilayer neural network work? What is perceptron? What is forward propagation, backward propagation, activation function, loss function optimizers? What is weight initialization techniques? What is vanishing gradient problem? Everything right over here. Again I have not written much topic so that you don't look, it does not look very big for you and you don't get demotivated. But all these concepts, again I have made a live playlist in my community series that is regarding day one to day five. So if you probably open this link, okay. So here you'll be able to see day one to day five. After day five, if I see this deep learning concept. So what I will do, I will open my YouTube channel and I'll write Krishnayak live deep learning, okay. So I will update those link over there. And here you have, right, so this is basically your live deep learning sessions, okay. So here you can probably see this day one to day five. This is basically the prerequisite, right. All the important things, how does forward propagation work? What is chain rule of derivatives? What exactly is optimizers? What exactly is loss function? What is forward propagation, backward propagation? The CNN I have actually implemented and shown in the practical way. So this link I will try to update it over here, right. So in basic deep learning concept, this is must. Because unless you don't understand that, the further topics will not be able to understand. This is the smartest way that I've actually created a roadmap where unnecessary jargons need not be added, okay. Then after you complete this, then we go to the advanced NLP concept. Now advanced NLP concept is nothing but day six to last video. Which one? If you probably click this link, here you'll be able to see six to entire tilt transformer. So here you can see day six, day seven. Here I've discussed about RNN. Here I've discussed about back propagation in RNN, LHTMRNN, word embedding, LHTM practical implementation, advanced LNM series, bidirectional LHTM, transformers, encoder, decoder. Everything is basically covered in all the specific videos. So if you are able to cover this from day six to probably 13th or 14th video, all these topics will be easily covered. If you are not able to find some topic, go and search for that topic with my name over there. I have a lot of videos. I have 1800 plus videos in my YouTube channel. When I've covered everything and this is what I've done from past three years, all the basics concepts have made strong so that you can clear the interview. There is no query that you cannot clear the interview guys. The kind of content that I've put in my YouTube channel is completely from free scratch, everything from basics so that you learn any advanced thing. It will be very much easy for you to crack the interview. So that is what my advanced NLP concept says where my main aim was to cover the transformer because after that, whenever you start your generative AI, that basically means most of your models are in the form of transformers or BERT, right? So here you have GPT-4, Mistral 7B. So here is where you probably start your journey. This is what is the prerequisite till here, right? And that is the reason I've created this prerequisite in this specific way. And it is simple. It's more about learn to the point, right? Don't waste your time much. Learn to the point, see some practical implementation because I don't think so this kind of practical implementation you will be doing in the industry also. But yes, in the interview, they may ask you now for those people who are directly developer, they can also jump in this particular topic, right? They can directly start from here because at the end of the day, they are very good at development, you know, who are full stack web developer who are working in the software engineering field, they can directly use these APIs and they can implement it. Now if I talk about some important things over here, right? Starting the journey towards generative AI. You need to really be very much open minded in probably doing a research on all these models, right? So Mistral 7b, Lama, Lama index, hugging face, open source libraries, Google Palm model and all, right? I've written over here because there are a lot of updates that are coming with respect to the specific models. And I'll tell you GPT-4 is must, hugging face is must, Lama, Mistral 7b, Mistral 7b has now recently come up with one amazing model. Again, we say it as a 87 cross B, right? Something like that. I'll be talking more about those models as we go ahead, right? And also try to see open source LNM models because with all this help of these models, you know, you can develop any business use cases that are specifically related to NLP, right? NLP, natural language processing, any use cases, chatbot to text summarization to documentation, anything, quiz, anything. So that is specifically required for the companies, you will be able to do this, right? And that is the reason why many companies are specifically using it. Large image models also you can use because all this large image models they'll be providing your diffusion models, they'll be providing your Dali, right? And based on that, you can do all further on top of it. But really, I'm focusing more on LLMs right now. So the first thing is OpenAI. It has amazing documentation link. Have already created videos, good playlist of videos over here, you can definitely look it out. And this documentation you really need to be good at. And the best thing about this documentation in OpenAI, it provides a good amazing things, right? OpenAI, start with this. Videos are given, you can definitely refer it. This year, I'm going to target LangChain like anything. LangChain, already if you go and see my YouTube playlist, they are on 10 to 12 videos. But LangChain is one amazing library that acts as a wrapper on top of OpenAI. It can also use hugging face. It has so many different functionalities to create all the LLM models, not only this. So LangChain also provides deployment techniques like LangServ, chain rest as rest APIs. This is nothing but it is entirely called as LangSmith. So you can probably see this is your LangChain application. It may be in Python, it may be in JavaScript. On top of that, you will be creating some template, you will be doing the deployment in the form of APIs by using this LangSmith. So going forward, I really want to probably see this entire tool and deployment techniques also and it is very much it is recently been announced, right? Why I like LangChain is that because it has so many different functionalities over here, right? Different functionalities from chatbots to prompts to modules to probably if you go and see right, what is LangServ will be releasing a hosted version of LangServ from one click deployment. Just imagine just one click deployment. All these things we are going to explore like anything as we go ahead, right? In LLMs, what exactly is unsync API, dissolve, prompting, most of the videos that I have already created, but every day I see something is getting added in this documentation, right? Over here only, if you probably see with respect to retrieval, right? In document loader, like which all documents will be able to load like CSV, file directory, HTML, JSON, markdown, PDF, document transformer, how do you split the documents and all, everything. But most of this everything will be combined in the form of a project in the end to end project where I do the deployment in an amazing way, right? All those things will basically be covered and here is a detailed video. After I get a good expertise on LangChain, I will also be starting with Chainlit. So Chainlit is another one documentation which looks very good over here, right? Good you will be able to do everything with the help of LangChain, Lama index, here also it is supported. You will also be able to do the deployment, right? Everything is there. You can do it in AWS, Azure, Google route, Replate, render, fly.io, hugging face spaces, okay? So once you probably cover this, right now in these two years you can probably target OpenAI and LangChain and Chainlit, okay? That is what I am actually going to do and based on this I will try to create a lot of videos, okay? LangChain, again you can interact with hugging face models, Lama, Mistral 7b, anything as such, whatever you want, okay? The next topic that you really need to focus is about vector databases and vector stores. Now vector database will play a very important role and again this will again be a part of LangChain itself because in LangChain it provides you functionalities. Some of the vector databases that I have already explored is like ChromaDB, FIAS. It is FIAS vector database is nothing but which makes use of Facebook AI similarity search library, right? So this is coming from Facebook. LandDB vector database based on the Lange data format. So there is a format which is called as Lange data set and again there you will be able to apply similarity search. Cassandra DB for storing vectors, Cassandra DB is also amazing, MongoDB is also amazing, right? At the end of the day, if you have any text you really want to convert that into vectors and since it is for the performance sake you really need to store it in some kind of vector database so that you can retrieve it and enjoy it. Then finally after doing this you have to probably do the deployment of LLM projects and this is what I am going to target. Within one month you will find all these videos in my YouTube channel, right? In AWS, Azure, LangSmith, LangSpa, HuggingFace, wherever you want to do the deployment. And finally guys if you really want to go ahead and check like how does, what is GenitiveAI, whether GenitiveAI is there for nano. I have given the course link over here, okay? This is a free community course where everybody can probably apply for it. You will find all the videos, materials and all and it is live series. Right now we are in day 5, day 6. So please go ahead and check it out, it is completely for free. See whether GenitiveAI is for you or not. And I hope if you are already in the data field I feel you really need to go towards GenitiveAI, right? At the end of the day. Because for the interview sake, yes, basic questions will be asked. But when you implement things in the production level, this all will be very much handy. And this is what is my main aim in 2024. I hope you like this particular video. Yes, please go ahead. Right now this is in private, I will make it public. So let me go ahead and make it public. So I will change it to public so that you can also access it. Make this repository public. And now one final thing is that I will also update this specific link of deep learning. So I hope you like this particular video. This was it from my side. I will see you all in the next video. Have a great day. Thank you and all. Take care. Bye bye."""

summary = summarize_text(text)
print(summary)


In [None]:
#with preprocessing
import re
from transformers import BartTokenizer, BartForConditionalGeneration

model_name = 'facebook/bart-large-cnn'
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

def preprocess_text(text):
    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces/newlines with a single space
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = text.lower()  # Convert to lowercase
    return text

def summarize_text(text, max_length=1000, min_length=500):
    preprocessed_text = preprocess_text(text)
    inputs = tokenizer(preprocessed_text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(inputs['input_ids'], max_length=max_length, min_length=min_length, length_penalty=2.0, num_beams=4)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Example usage
text = """Hello all, my name is Krushnayak and welcome to my YouTube channel. So guys before the start of every new year, I usually plan that what all skill sets I really need to add in my bucket list so that I'll be able to teach you all. I will be also able to show you that how specifically it is used in industries. In 2023, you know, I had targeted various MLops platform and if you probably see my videos in my YouTube channel, I've covered a lot of end to end projects. I have covered a lot many MLops platform tools, you know, and I've shown each and everything, which was really beneficial for many people out there who really want to make successful career transition into the data science industry. And MLops, I think in 2024 also it will play a very important role since right now LLM applications are basically coming up, you know, generative AI is there. So many tools in generative AI have actually come frameworks, libraries and many more things. So MLops will still be there in 2024. But my main aim in 2023 was to understand all about different MLops platform and try to create videos, try to upgrade myself with respect to all those skill sets and provide those videos to you. This was very beneficial. Many people were able to make successful career transition. Now in 2024, I'm also going to focus. See in 2023 also from past three to four months, I'm really much focused in generative AI solving amazing use cases, trying to see multiple frameworks, trying to see multiple LLM models, how you can fine tune it, how you can probably use it in business use cases. And based on that, I'm also creating projects. But majorly in 2024, right, the weightage between MLops and generative AI, I will try to put my most of the weightage in understanding different, different frameworks, cloud platforms, techniques in business use cases in the field of generative AI. So in this video, like if you are also interested and obviously you can see the growth in generative area, how it is happening every other day, some new things are actually coming, right? Let it be models, let it be LLM models, the recent launch of Google Gemini. I know that Gemini did not provide you the right kind of demo video. But other than that, you have Mistral 7B, you have OpenAI, GPD for turbo, right? All these models are specifically coming. Now people are using this in creating amazing AI applications. And you can see there are a lot of startups from image to text, text to image. There are so many different things. I was just seeing one AI platform today, you know, it is basically called as Pickalabs. You just give a prompt and it will try to create an amazing video for you, right? And just imagine you don't even require a video editing any person over there, right? And this is super easy, right? When you probably see this kind of application. So 2024, I'm going to completely focus most of my, if I say 100% from 100%, I'm going to focus 60 to 70% in generative AI and 30% with respect to MLOps platform. And don't worry, every videos that I probably learn will be coming in the form of YouTube channel. But in this video, if you are also really interested to understand about generative AI, I've created an amazing roadmap to learn generative AI in 2024. And this will be specifically for two kinds of roles, which I'm actually going to discuss. And this roadmap, trust me, it has some prerequisites. I'm also going to provide you some videos over here, because if you just cover those prerequisites, that will be more than sufficient to start with generative AI. Yeah. And this is for all those people who are really interested to work in the data field, right? Data analytics field. If you're a developer, if you probably want to work into the generative AI, I think you don't even require this prerequisite. And I'll also call out that specific information over here in my GitHub. Once I probably show you the specific roadmap. So let me go ahead and let me share my screen. So this is the roadmap to learn generative AI. So here you can probably see everything is there. I've provided videos link, I have everything, the prerequisites that is actually required. Everything is mentioned over here. And please make sure that you fork this repository, keep a star on it, because 2024, I am going to really create a lot many things over here. It'll be quite amazing, right? Many, many things will be coming with respect to projects, with respect to frameworks, with respect to what are things I'm going to specifically cover in Lanch in everything, right? So let's start and let's understand this particular roadmap. Before you go ahead, please make sure that you hit like for this particular video will target at 2000 likes at least, you know, because this will be super beneficial because here you're going to get all the videos, all the materials, everything in front of you with respect to the prerequisites. Now I will talk about two different profiles over here. One profile is that if you want to get into the data analytics industry, and let's say you want to probably start if you're starting from scratch, okay, starting from scratch, or from past two years, you're also already in the data analytics field, you're learning about data science, what kind of upgradation you can basically do. The second type of people will be core developers, right? So in my company also, we have used generative AI, we are creating support systems, we are creating assignment, Q&A systems, many more things are there. Right now those developers are specifically creating and they don't have need to have all the knowledge with respect to some kind of prerequisites as such directly they can start with generative AI. So I will be talking about both of them step by step will try to understand. So the roadmap to learn about generative AI in 2024 is that first of all, you start with one programming language. I will again prefer Python programming language because trust me any LLM models that is probably coming from hugging face to open AI to Mistral everywhere Python SDK is there, right? So Python SDK with the help of Python programming language, you will be able to access those API's, you'll be able to probably work on them, you will be able to probably implement any kind of applications. And not only that, you'll also be able to deploy because they are good cloud platforms that are specifically come with respect to that. Okay, so here, Python programming language, I have given this particular link, see, I've already created Python programming language in English in Hindi, we'll be able to probably find out all this particular videos. I've already shown you this particular videos, good videos, if you probably see there are millions and millions of views with respect to this all videos. Again at the end of the day, this will be super beneficial for everyone out there, right? And not only this Python, if you just want to also become a Python programmer, everything is covered from modular coding to inheritance to oops constant, everything is probably explained along with materials. So this is the first thing. Now here, in third and fourth, I have also added some frameworks playlist, right? Like what like Flask playlist, fast API tutorials and all, right? So these are frameworks, streamlet is one framework, different different frameworks you can specifically use, okay? It is up to you. See, I've created videos on streamlet also I've created videos on Gradio also, but I've just given two frameworks over here Flask and fast API to just give you an idea. If you search for streamlet in my YouTube channel, you'll be able to get those videos also. So Python is the first thing, okay? Usually if you are from another like other like core, if you are directly a developer, you have working in applications, you're creating like full stack web developer, you're having those kind of position. And let's say someone comes and tells you, okay, go ahead and apply this generative way. I think in our project, try to create a chat bot, try to create something. At that time, you don't require Python, whatever will be a core language like JavaScript or you can go ahead with that because that kind of SDK is also provided. But this is specifically with respect to the data field. Now coming to the next thing, see, over here, the roadmap that I'm talking about generative AI, this is with respect to LLM models, I'm not talking about large image models, okay? What I feel is that computer vision, large image models is also good. But the kind of kind of update, upgradation that are specifically happening is with respect to LLM models. This is the one thing that I'm talking about this roadmap, I really want to make it for understanding like how you can become better in generative AI in the field of LLM applications, okay, large language model. Whenever I say large language model, I'm basically talking about NLP. Okay? I am not exploring much in large image models because computer vision, I too don't have that much interest. So in my YouTube channel, so you'll be finding videos that I have not uploaded much with respect to computer vision, okay? But I'm really interested in NLP. So after completing Python, you will basically be having the basic machine learning natural language processing. Again, this is basically the prerequisites, okay? And here I've given, I've taken already natural language processing live session where I've covered in five days all these topics, YNLP, one-hot encoding, bag of words, TF-IDF, word to beck, average word to beck, and there are a lot many topics that I've actually covered, which you will be able to find over here from day one to day five. So if I probably go ahead and open this particular video here from day one to day five, you'll be getting everything like word embedding, C bow, what is skip gram, word to beck. This five video will be able to help you understand because see, this is a very good roadmap, a prerequisite, a short roadmap. I'm not going to make a very big roadmap because this concept is basically used so that you understand them. What is vectors? You know, how we convert text into a numerical variable. All these things will be important for your interview because in the interview, they'll not directly are generated way, I think. They'll first of all see how good your basics is, right? So considering this, here I could add more topics over here based on this day one to day five, but your task is as a prerequisite is to basically see this day one to day five. And this also includes practical implementation. Now the third day, third part is that you need to cover basic deep learning concepts. Now when I say basic deep learning concepts, this is something related to ANN, like how does a multilayer neural network work? What is perceptron? What is forward propagation, backward propagation, activation function, loss function optimizers? What is weight initialization techniques? What is vanishing gradient problem? Everything right over here. Again I have not written much topic so that you don't look, it does not look very big for you and you don't get demotivated. But all these concepts, again I have made a live playlist in my community series that is regarding day one to day five. So if you probably open this link, okay. So here you'll be able to see day one to day five. After day five, if I see this deep learning concept. So what I will do, I will open my YouTube channel and I'll write Krishnayak live deep learning, okay. So I will update those link over there. And here you have, right, so this is basically your live deep learning sessions, okay. So here you can probably see this day one to day five. This is basically the prerequisite, right. All the important things, how does forward propagation work? What is chain rule of derivatives? What exactly is optimizers? What exactly is loss function? What is forward propagation, backward propagation? The CNN I have actually implemented and shown in the practical way. So this link I will try to update it over here, right. So in basic deep learning concept, this is must. Because unless you don't understand that, the further topics will not be able to understand. This is the smartest way that I've actually created a roadmap where unnecessary jargons need not be added, okay. Then after you complete this, then we go to the advanced NLP concept. Now advanced NLP concept is nothing but day six to last video. Which one? If you probably click this link, here you'll be able to see six to entire tilt transformer. So here you can see day six, day seven. Here I've discussed about RNN. Here I've discussed about back propagation in RNN, LHTMRNN, word embedding, LHTM practical implementation, advanced LNM series, bidirectional LHTM, transformers, encoder, decoder. Everything is basically covered in all the specific videos. So if you are able to cover this from day six to probably 13th or 14th video, all these topics will be easily covered. If you are not able to find some topic, go and search for that topic with my name over there. I have a lot of videos. I have 1800 plus videos in my YouTube channel. When I've covered everything and this is what I've done from past three years, all the basics concepts have made strong so that you can clear the interview. There is no query that you cannot clear the interview guys. The kind of content that I've put in my YouTube channel is completely from free scratch, everything from basics so that you learn any advanced thing. It will be very much easy for you to crack the interview. So that is what my advanced NLP concept says where my main aim was to cover the transformer because after that, whenever you start your generative AI, that basically means most of your models are in the form of transformers or BERT, right? So here you have GPT-4, Mistral 7B. So here is where you probably start your journey. This is what is the prerequisite till here, right? And that is the reason I've created this prerequisite in this specific way. And it is simple. It's more about learn to the point, right? Don't waste your time much. Learn to the point, see some practical implementation because I don't think so this kind of practical implementation you will be doing in the industry also. But yes, in the interview, they may ask you now for those people who are directly developer, they can also jump in this particular topic, right? They can directly start from here because at the end of the day, they are very good at development, you know, who are full stack web developer who are working in the software engineering field, they can directly use these APIs and they can implement it. Now if I talk about some important things over here, right? Starting the journey towards generative AI. You need to really be very much open minded in probably doing a research on all these models, right? So Mistral 7b, Lama, Lama index, hugging face, open source libraries, Google Palm model and all, right? I've written over here because there are a lot of updates that are coming with respect to the specific models. And I'll tell you GPT-4 is must, hugging face is must, Lama, Mistral 7b, Mistral 7b has now recently come up with one amazing model. Again, we say it as a 87 cross B, right? Something like that. I'll be talking more about those models as we go ahead, right? And also try to see open source LNM models because with all this help of these models, you know, you can develop any business use cases that are specifically related to NLP, right? NLP, natural language processing, any use cases, chatbot to text summarization to documentation, anything, quiz, anything. So that is specifically required for the companies, you will be able to do this, right? And that is the reason why many companies are specifically using it. Large image models also you can use because all this large image models they'll be providing your diffusion models, they'll be providing your Dali, right? And based on that, you can do all further on top of it. But really, I'm focusing more on LLMs right now. So the first thing is OpenAI. It has amazing documentation link. Have already created videos, good playlist of videos over here, you can definitely look it out. And this documentation you really need to be good at. And the best thing about this documentation in OpenAI, it provides a good amazing things, right? OpenAI, start with this. Videos are given, you can definitely refer it. This year, I'm going to target LangChain like anything. LangChain, already if you go and see my YouTube playlist, they are on 10 to 12 videos. But LangChain is one amazing library that acts as a wrapper on top of OpenAI. It can also use hugging face. It has so many different functionalities to create all the LLM models, not only this. So LangChain also provides deployment techniques like LangServ, chain rest as rest APIs. This is nothing but it is entirely called as LangSmith. So you can probably see this is your LangChain application. It may be in Python, it may be in JavaScript. On top of that, you will be creating some template, you will be doing the deployment in the form of APIs by using this LangSmith. So going forward, I really want to probably see this entire tool and deployment techniques also and it is very much it is recently been announced, right? Why I like LangChain is that because it has so many different functionalities over here, right? Different functionalities from chatbots to prompts to modules to probably if you go and see right, what is LangServ will be releasing a hosted version of LangServ from one click deployment. Just imagine just one click deployment. All these things we are going to explore like anything as we go ahead, right? In LLMs, what exactly is unsync API, dissolve, prompting, most of the videos that I have already created, but every day I see something is getting added in this documentation, right? Over here only, if you probably see with respect to retrieval, right? In document loader, like which all documents will be able to load like CSV, file directory, HTML, JSON, markdown, PDF, document transformer, how do you split the documents and all, everything. But most of this everything will be combined in the form of a project in the end to end project where I do the deployment in an amazing way, right? All those things will basically be covered and here is a detailed video. After I get a good expertise on LangChain, I will also be starting with Chainlit. So Chainlit is another one documentation which looks very good over here, right? Good you will be able to do everything with the help of LangChain, Lama index, here also it is supported. You will also be able to do the deployment, right? Everything is there. You can do it in AWS, Azure, Google route, Replate, render, fly.io, hugging face spaces, okay? So once you probably cover this, right now in these two years you can probably target OpenAI and LangChain and Chainlit, okay? That is what I am actually going to do and based on this I will try to create a lot of videos, okay? LangChain, again you can interact with hugging face models, Lama, Mistral 7b, anything as such, whatever you want, okay? The next topic that you really need to focus is about vector databases and vector stores. Now vector database will play a very important role and again this will again be a part of LangChain itself because in LangChain it provides you functionalities. Some of the vector databases that I have already explored is like ChromaDB, FIAS. It is FIAS vector database is nothing but which makes use of Facebook AI similarity search library, right? So this is coming from Facebook. LandDB vector database based on the Lange data format. So there is a format which is called as Lange data set and again there you will be able to apply similarity search. Cassandra DB for storing vectors, Cassandra DB is also amazing, MongoDB is also amazing, right? At the end of the day, if you have any text you really want to convert that into vectors and since it is for the performance sake you really need to store it in some kind of vector database so that you can retrieve it and enjoy it. Then finally after doing this you have to probably do the deployment of LLM projects and this is what I am going to target. Within one month you will find all these videos in my YouTube channel, right? In AWS, Azure, LangSmith, LangSpa, HuggingFace, wherever you want to do the deployment. And finally guys if you really want to go ahead and check like how does, what is GenitiveAI, whether GenitiveAI is there for nano. I have given the course link over here, okay? This is a free community course where everybody can probably apply for it. You will find all the videos, materials and all and it is live series. Right now we are in day 5, day 6. So please go ahead and check it out, it is completely for free. See whether GenitiveAI is for you or not. And I hope if you are already in the data field I feel you really need to go towards GenitiveAI, right? At the end of the day. Because for the interview sake, yes, basic questions will be asked. But when you implement things in the production level, this all will be very much handy. And this is what is my main aim in 2024. I hope you like this particular video. Yes, please go ahead. Right now this is in private, I will make it public. So let me go ahead and make it public. So I will change it to public so that you can also access it. Make this repository public. And now one final thing is that I will also update this specific link of deep learning. So I hope you like this particular video. This was it from my side. I will see you all in the next video. Have a great day. Thank you and all. Take care. Bye bye."""

summary = summarize_text(text)
print(summary)
